2 * Copyright (C) 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
31 /* This pass promotes reads from uniforms from load/store ops to uniform
32 * registers if it is beneficial to do so. Normally, this saves both
33 * instructions and total register pressure, but it does take a toll on the
34 * number of work registers that are available, so this is a balance.
36 * We use a heuristic to determine the ideal count, implemented by
37 * mir_work_heuristic, which returns the ideal number of work registers.
41 mir_is_promoteable_ubo(midgard_instruction
*ins
)
43 /* TODO: promote unaligned access via swizzle? */
45 return (ins
->type
== TAG_LOAD_STORE_4
) &&
46 (OP_IS_UBO_READ(ins
->op
)) &&
47 !(ins
->constants
.u32
[0] & 0xF) &&
48 !(ins
->load_store
.arg_1
) &&
49 (ins
->load_store
.arg_2
== 0x1E) &&
50 ((ins
->constants
.u32
[0] / 16) < 16);
54 mir_promoteable_uniform_count(compiler_context
*ctx
)
58 mir_foreach_instr_global(ctx
, ins
) {
59 if (mir_is_promoteable_ubo(ins
))
60 count
= MAX2(count
, ins
->constants
.u32
[0] / 16);
67 mir_count_live(uint16_t *live
, unsigned temp_count
)
71 for (unsigned i
= 0; i
< temp_count
; ++i
)
72 count
+= util_bitcount(live
[i
]);
78 mir_estimate_pressure(compiler_context
*ctx
)
80 mir_invalidate_liveness(ctx
);
81 mir_compute_liveness(ctx
);
83 unsigned max_live
= 0;
85 mir_foreach_block(ctx
, _block
) {
86 midgard_block
*block
= (midgard_block
*) _block
;
87 uint16_t *live
= mem_dup(block
->base
.live_out
, ctx
->temp_count
* sizeof(uint16_t));
89 mir_foreach_instr_in_block_rev(block
, ins
) {
90 unsigned count
= mir_count_live(live
, ctx
->temp_count
);
91 max_live
= MAX2(max_live
, count
);
92 mir_liveness_ins_update(live
, ins
, ctx
->temp_count
);
98 return DIV_ROUND_UP(max_live
, 16);
102 mir_work_heuristic(compiler_context
*ctx
)
104 unsigned uniform_count
= mir_promoteable_uniform_count(ctx
);
106 /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
107 * allow as many work registers as needed */
109 if (uniform_count
<= 8)
112 /* Otherwise, estimate the register pressure */
114 unsigned pressure
= mir_estimate_pressure(ctx
);
116 /* Prioritize not spilling above all else. The relation between the
117 * pressure estimate and the actual register pressure is a little
118 * murkier than we might like (due to scheduling, pipeline registers,
119 * failure to pack vector registers, load/store registers, texture
120 * registers...), hence why this is a heuristic parameter */
125 /* If there's no chance of spilling, prioritize UBOs and thread count */
130 /* Bitset of indices that will be used as a special register -- inputs to a
131 * non-ALU op. We precompute this set so that testing is efficient, otherwise
132 * we end up O(mn) behaviour for n instructions and m uniform reads */
135 mir_special_indices(compiler_context
*ctx
)
137 mir_compute_temp_count(ctx
);
138 BITSET_WORD
*bset
= calloc(BITSET_WORDS(ctx
->temp_count
), sizeof(BITSET_WORD
));
140 mir_foreach_instr_global(ctx
, ins
) {
141 /* Look for special instructions */
142 bool is_ldst
= ins
->type
== TAG_LOAD_STORE_4
;
143 bool is_tex
= ins
->type
== TAG_TEXTURE_4
;
144 bool is_writeout
= ins
->compact_branch
&& ins
->writeout
;
146 if (!(is_ldst
|| is_tex
|| is_writeout
))
149 /* Anything read by a special instruction is itself special */
150 mir_foreach_src(ins
, i
) {
151 unsigned idx
= ins
->src
[i
];
153 if (idx
< ctx
->temp_count
)
154 BITSET_SET(bset
, idx
);
162 midgard_promote_uniforms(compiler_context
*ctx
)
164 unsigned work_count
= mir_work_heuristic(ctx
);
165 unsigned promoted_count
= 24 - work_count
;
167 /* First, figure out special indices a priori so we don't recompute a lot */
168 BITSET_WORD
*special
= mir_special_indices(ctx
);
170 mir_foreach_instr_global_safe(ctx
, ins
) {
171 if (!mir_is_promoteable_ubo(ins
)) continue;
173 unsigned off
= ins
->constants
.u32
[0];
174 unsigned address
= off
/ 16;
176 /* Check if it's a promotable range */
177 unsigned uniform_reg
= 23 - address
;
179 if (address
>= promoted_count
) continue;
181 /* It is, great! Let's promote */
183 ctx
->uniform_cutoff
= MAX2(ctx
->uniform_cutoff
, address
+ 1);
184 unsigned promoted
= SSA_FIXED_REGISTER(uniform_reg
);
186 /* We do need the move for safety for a non-SSA dest, or if
187 * we're being fed into a special class */
189 bool needs_move
= ins
->dest
& PAN_IS_REG
|| ins
->dest
== ctx
->blend_src1
;
191 if (ins
->dest
< ctx
->temp_count
)
192 needs_move
|= BITSET_TEST(special
, ins
->dest
);
195 unsigned type_size
= nir_alu_type_get_type_size(ins
->dest_type
);
196 midgard_instruction mov
= v_mov(promoted
, ins
->dest
);
197 mov
.dest_type
= nir_type_uint
| type_size
;
198 mov
.src_types
[0] = mov
.dest_type
;
200 uint16_t rounded
= mir_round_bytemask_up(mir_bytemask(ins
), type_size
);
201 mir_set_bytemask(&mov
, rounded
);
202 mir_insert_instruction_before(ctx
, ins
, mov
);
204 mir_rewrite_index_src(ctx
, ins
->dest
, promoted
);
207 mir_remove_instruction(ins
);