2 * Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "midgard_ops.h"
27 void mir_rewrite_index_src_single(midgard_instruction
*ins
, unsigned old
, unsigned new)
29 for (unsigned i
= 0; i
< ARRAY_SIZE(ins
->src
); ++i
) {
30 if (ins
->src
[i
] == old
)
35 void mir_rewrite_index_dst_single(midgard_instruction
*ins
, unsigned old
, unsigned new)
42 mir_rewrite_index_src_single_swizzle(midgard_instruction
*ins
, unsigned old
, unsigned new, unsigned *swizzle
)
44 for (unsigned i
= 0; i
< ARRAY_SIZE(ins
->src
); ++i
) {
45 if (ins
->src
[i
] != old
) continue;
48 mir_compose_swizzle(ins
->swizzle
[i
], swizzle
, ins
->swizzle
[i
]);
53 mir_rewrite_index_src(compiler_context
*ctx
, unsigned old
, unsigned new)
55 mir_foreach_instr_global(ctx
, ins
) {
56 mir_rewrite_index_src_single(ins
, old
, new);
61 mir_rewrite_index_src_swizzle(compiler_context
*ctx
, unsigned old
, unsigned new, unsigned *swizzle
)
63 mir_foreach_instr_global(ctx
, ins
) {
64 mir_rewrite_index_src_single_swizzle(ins
, old
, new, swizzle
);
69 mir_rewrite_index_dst(compiler_context
*ctx
, unsigned old
, unsigned new)
71 mir_foreach_instr_global(ctx
, ins
) {
72 mir_rewrite_index_dst_single(ins
, old
, new);
75 /* Implicitly written before the shader */
76 if (ctx
->blend_input
== old
)
77 ctx
->blend_input
= new;
81 mir_rewrite_index(compiler_context
*ctx
, unsigned old
, unsigned new)
83 mir_rewrite_index_src(ctx
, old
, new);
84 mir_rewrite_index_dst(ctx
, old
, new);
88 mir_use_count(compiler_context
*ctx
, unsigned value
)
90 unsigned used_count
= 0;
92 mir_foreach_instr_global(ctx
, ins
) {
93 if (mir_has_arg(ins
, value
))
100 /* Checks if a value is used only once (or totally dead), which is an important
101 * heuristic to figure out if certain optimizations are Worth It (TM) */
104 mir_single_use(compiler_context
*ctx
, unsigned value
)
106 /* We can replicate constants in places so who cares */
107 if (value
== SSA_FIXED_REGISTER(REGISTER_CONSTANT
))
110 return mir_use_count(ctx
, value
) <= 1;
114 mir_nontrivial_mod(midgard_instruction
*ins
, unsigned i
, bool check_swizzle
)
116 bool is_int
= midgard_is_integer_op(ins
->alu
.op
);
119 if (ins
->src_shift
[i
]) return true;
121 if (ins
->src_neg
[i
]) return true;
122 if (ins
->src_abs
[i
]) return true;
125 if (ins
->dest_type
!= ins
->src_types
[i
]) return true;
128 for (unsigned c
= 0; c
< 16; ++c
) {
129 if (!(ins
->mask
& (1 << c
))) continue;
130 if (ins
->swizzle
[i
][c
] != c
) return true;
138 mir_nontrivial_outmod(midgard_instruction
*ins
)
140 bool is_int
= midgard_is_integer_op(ins
->alu
.op
);
141 unsigned mod
= ins
->alu
.outmod
;
143 if (ins
->dest_type
!= ins
->src_types
[1])
147 return mod
!= midgard_outmod_int_wrap
;
149 return mod
!= midgard_outmod_none
;
152 /* 128 / sz = exp2(log2(128 / sz))
153 * = exp2(log2(128) - log2(sz))
154 * = exp2(7 - log2(sz))
155 * = 1 << (7 - log2(sz))
159 mir_components_for_bits(unsigned bits
)
161 return 1 << (7 - util_logbase2(bits
));
165 mir_components_for_type(nir_alu_type T
)
167 unsigned sz
= nir_alu_type_get_type_size(T
);
168 return mir_components_for_bits(sz
);
172 mir_from_bytemask(uint16_t bytemask
, unsigned bits
)
175 unsigned count
= bits
/ 8;
177 for (unsigned c
= 0, d
= 0; c
< 16; c
+= count
, ++d
) {
178 bool a
= (bytemask
& (1 << c
)) != 0;
180 for (unsigned q
= c
; q
< count
; ++q
)
181 assert(((bytemask
& (1 << q
)) != 0) == a
);
189 /* Rounds up a bytemask to fill a given component count. Iterate each
190 * component, and check if any bytes in the component are masked on */
193 mir_round_bytemask_up(uint16_t mask
, unsigned bits
)
195 unsigned bytes
= bits
/ 8;
196 unsigned maxmask
= mask_of(bytes
);
197 unsigned channels
= mir_components_for_bits(bits
);
199 for (unsigned c
= 0; c
< channels
; ++c
) {
200 unsigned submask
= maxmask
<< (c
* bytes
);
209 /* Grabs the per-byte mask of an instruction (as opposed to per-component) */
212 mir_bytemask(midgard_instruction
*ins
)
214 unsigned type_size
= nir_alu_type_get_type_size(ins
->dest_type
);
215 return pan_to_bytemask(type_size
, ins
->mask
);
219 mir_set_bytemask(midgard_instruction
*ins
, uint16_t bytemask
)
221 unsigned type_size
= nir_alu_type_get_type_size(ins
->dest_type
);
222 ins
->mask
= mir_from_bytemask(bytemask
, type_size
);
225 /* Checks if we should use an upper destination override, rather than the lower
226 * one in the IR. Returns zero if no, returns the bytes to shift otherwise */
229 mir_upper_override(midgard_instruction
*ins
, unsigned inst_size
)
231 unsigned type_size
= nir_alu_type_get_type_size(ins
->dest_type
);
233 /* If the sizes are the same, there's nothing to override */
234 if (type_size
== inst_size
)
237 /* There are 16 bytes per vector, so there are (16/bytes)
238 * components per vector. So the magic half is half of
239 * (16/bytes), which simplifies to 8/bytes = 8 / (bits / 8) = 64 / bits
242 unsigned threshold
= mir_components_for_bits(type_size
) >> 1;
244 /* How many components did we shift over? */
245 unsigned zeroes
= __builtin_ctz(ins
->mask
);
247 /* Did we hit the threshold? */
248 return (zeroes
>= threshold
) ? threshold
: 0;
251 /* Creates a mask of the components of a node read by an instruction, by
252 * analyzing the swizzle with respect to the instruction's mask. E.g.:
254 * fadd r0.xz, r1.yyyy, r2.zwyx
256 * will return a mask of Z/Y for r2
260 mir_bytemask_of_read_components_single(unsigned *swizzle
, unsigned inmask
, unsigned bits
)
264 for (unsigned c
= 0; c
< MIR_VEC_COMPONENTS
; ++c
) {
265 if (!(inmask
& (1 << c
))) continue;
266 cmask
|= (1 << swizzle
[c
]);
269 return pan_to_bytemask(bits
, cmask
);
273 mir_bytemask_of_read_components_index(midgard_instruction
*ins
, unsigned i
)
275 /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
276 if (ins
->compact_branch
&& ins
->branch
.conditional
&& (i
== 0))
279 /* ALU ops act componentwise so we need to pay attention to
280 * their mask. Texture/ldst does not so we don't clamp source
281 * readmasks based on the writemask */
284 /* Handle dot products and things */
285 if (ins
->type
== TAG_ALU_4
&& !ins
->compact_branch
) {
286 unsigned props
= alu_opcode_props
[ins
->alu
.op
].props
;
288 unsigned channel_override
= GET_CHANNEL_COUNT(props
);
290 if (channel_override
)
291 qmask
= mask_of(channel_override
);
296 return mir_bytemask_of_read_components_single(ins
->swizzle
[i
], qmask
,
297 nir_alu_type_get_type_size(ins
->src_types
[i
]));
301 mir_bytemask_of_read_components(midgard_instruction
*ins
, unsigned node
)
308 mir_foreach_src(ins
, i
) {
309 if (ins
->src
[i
] != node
) continue;
310 mask
|= mir_bytemask_of_read_components_index(ins
, i
);
316 /* Register allocation occurs after instruction scheduling, which is fine until
317 * we start needing to spill registers and therefore insert instructions into
318 * an already-scheduled program. We don't have to be terribly efficient about
319 * this, since spilling is already slow. So just semantically we need to insert
320 * the instruction into a new bundle before/after the bundle of the instruction
323 static midgard_bundle
324 mir_bundle_for_op(compiler_context
*ctx
, midgard_instruction ins
)
326 midgard_instruction
*u
= mir_upload_ins(ctx
, ins
);
328 midgard_bundle bundle
= {
330 .instruction_count
= 1,
331 .instructions
= { u
},
334 if (bundle
.tag
== TAG_ALU_4
) {
335 assert(OP_IS_MOVE(u
->alu
.op
));
338 size_t bytes_emitted
= sizeof(uint32_t) + sizeof(midgard_reg_info
) + sizeof(midgard_vector_alu
);
339 bundle
.padding
= ~(bytes_emitted
- 1) & 0xF;
340 bundle
.control
= ins
.type
| u
->unit
;
347 mir_bundle_idx_for_ins(midgard_instruction
*tag
, midgard_block
*block
)
349 midgard_bundle
*bundles
=
350 (midgard_bundle
*) block
->bundles
.data
;
352 size_t count
= (block
->bundles
.size
/ sizeof(midgard_bundle
));
354 for (unsigned i
= 0; i
< count
; ++i
) {
355 for (unsigned j
= 0; j
< bundles
[i
].instruction_count
; ++j
) {
356 if (bundles
[i
].instructions
[j
] == tag
)
361 mir_print_instruction(tag
);
362 unreachable("Instruction not scheduled in block");
366 mir_insert_instruction_before_scheduled(
367 compiler_context
*ctx
,
368 midgard_block
*block
,
369 midgard_instruction
*tag
,
370 midgard_instruction ins
)
372 unsigned before
= mir_bundle_idx_for_ins(tag
, block
);
373 size_t count
= util_dynarray_num_elements(&block
->bundles
, midgard_bundle
);
374 UNUSED
void *unused
= util_dynarray_grow(&block
->bundles
, midgard_bundle
, 1);
376 midgard_bundle
*bundles
= (midgard_bundle
*) block
->bundles
.data
;
377 memmove(bundles
+ before
+ 1, bundles
+ before
, (count
- before
) * sizeof(midgard_bundle
));
378 midgard_bundle
*before_bundle
= bundles
+ before
+ 1;
380 midgard_bundle
new = mir_bundle_for_op(ctx
, ins
);
381 memcpy(bundles
+ before
, &new, sizeof(new));
383 list_addtail(&new.instructions
[0]->link
, &before_bundle
->instructions
[0]->link
);
384 block
->quadword_count
+= midgard_tag_props
[new.tag
].size
;
388 mir_insert_instruction_after_scheduled(
389 compiler_context
*ctx
,
390 midgard_block
*block
,
391 midgard_instruction
*tag
,
392 midgard_instruction ins
)
394 /* We need to grow the bundles array to add our new bundle */
395 size_t count
= util_dynarray_num_elements(&block
->bundles
, midgard_bundle
);
396 UNUSED
void *unused
= util_dynarray_grow(&block
->bundles
, midgard_bundle
, 1);
398 /* Find the bundle that we want to insert after */
399 unsigned after
= mir_bundle_idx_for_ins(tag
, block
);
401 /* All the bundles after that one, we move ahead by one */
402 midgard_bundle
*bundles
= (midgard_bundle
*) block
->bundles
.data
;
403 memmove(bundles
+ after
+ 2, bundles
+ after
+ 1, (count
- after
- 1) * sizeof(midgard_bundle
));
404 midgard_bundle
*after_bundle
= bundles
+ after
;
406 midgard_bundle
new = mir_bundle_for_op(ctx
, ins
);
407 memcpy(bundles
+ after
+ 1, &new, sizeof(new));
408 list_add(&new.instructions
[0]->link
, &after_bundle
->instructions
[after_bundle
->instruction_count
- 1]->link
);
409 block
->quadword_count
+= midgard_tag_props
[new.tag
].size
;
412 /* Flip the first-two arguments of a (binary) op. Currently ALU
413 * only, no known uses for ldst/tex */
416 mir_flip(midgard_instruction
*ins
)
418 unsigned temp
= ins
->src
[0];
419 ins
->src
[0] = ins
->src
[1];
422 assert(ins
->type
== TAG_ALU_4
);
424 temp
= ins
->alu
.src1
;
425 ins
->alu
.src1
= ins
->alu
.src2
;
426 ins
->alu
.src2
= temp
;
428 temp
= ins
->src_types
[0];
429 ins
->src_types
[0] = ins
->src_types
[1];
430 ins
->src_types
[1] = temp
;
432 temp
= ins
->src_abs
[0];
433 ins
->src_abs
[0] = ins
->src_abs
[1];
434 ins
->src_abs
[1] = temp
;
436 temp
= ins
->src_neg
[0];
437 ins
->src_neg
[0] = ins
->src_neg
[1];
438 ins
->src_neg
[1] = temp
;
440 temp
= ins
->src_invert
[0];
441 ins
->src_invert
[0] = ins
->src_invert
[1];
442 ins
->src_invert
[1] = temp
;
444 unsigned temp_swizzle
[16];
445 memcpy(temp_swizzle
, ins
->swizzle
[0], sizeof(ins
->swizzle
[0]));
446 memcpy(ins
->swizzle
[0], ins
->swizzle
[1], sizeof(ins
->swizzle
[0]));
447 memcpy(ins
->swizzle
[1], temp_swizzle
, sizeof(ins
->swizzle
[0]));
450 /* Before squashing, calculate ctx->temp_count just by observing the MIR */
453 mir_compute_temp_count(compiler_context
*ctx
)
458 unsigned max_dest
= 0;
460 mir_foreach_instr_global(ctx
, ins
) {
461 if (ins
->dest
< SSA_FIXED_MINIMUM
)
462 max_dest
= MAX2(max_dest
, ins
->dest
+ 1);
465 ctx
->temp_count
= max_dest
;