pan/midgard: Account for unaligned UBOs when promoting uniforms
[mesa.git] / src / panfrost / midgard / mir_promote_uniforms.c
1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #include "compiler.h"
28 #include "util/u_math.h"
29
30 /* This pass promotes reads from uniforms from load/store ops to uniform
31 * registers if it is beneficial to do so. Normally, this saves both
32 * instructions and total register pressure, but it does take a toll on the
33 * number of work registers that are available, so this is a balance.
34 *
35 * To cope, we take as an argument the maximum work register pressure in the
36 * program so we allow that many registers through at minimum, to prevent
37 * spilling. If we spill anyway, I mean, it's a lose-lose at that point. */
38
39 static unsigned
40 mir_ubo_offset(midgard_instruction *ins)
41 {
42 assert(ins->type == TAG_LOAD_STORE_4);
43 assert(OP_IS_UBO_READ(ins->load_store.op));
44
45 /* Grab the offset as the hw understands it */
46 unsigned lo = ins->load_store.varying_parameters >> 7;
47 unsigned hi = ins->load_store.address;
48 unsigned raw = ((hi << 3) | lo);
49
50 /* Account for the op's shift */
51 unsigned shift = mir_ubo_shift(ins->load_store.op);
52 return (raw << shift);
53 }
54
55 void
56 midgard_promote_uniforms(compiler_context *ctx, unsigned promoted_count)
57 {
58 mir_foreach_instr_global_safe(ctx, ins) {
59 if (ins->type != TAG_LOAD_STORE_4) continue;
60 if (!OP_IS_UBO_READ(ins->load_store.op)) continue;
61
62 /* Get the offset. TODO: can we promote unaligned access? */
63 unsigned off = mir_ubo_offset(ins);
64 if (off & 0xF) continue;
65
66 unsigned address = off / 16;
67
68 /* Check this is UBO 0 */
69 if (ins->load_store.arg_1) continue;
70
71 /* Check we're accessing directly */
72 if (ins->load_store.arg_2 != 0x1E) continue;
73
74 /* Check if it's a promotable range */
75 unsigned uniform_reg = 23 - address;
76
77 if (address >= promoted_count) continue;
78
79 /* It is, great! Let's promote */
80
81 ctx->uniform_cutoff = MAX2(ctx->uniform_cutoff, address + 1);
82 unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
83
84 /* We do need the move for safety for a non-SSA dest, or if
85 * we're being fed into a special class */
86
87 bool needs_move = ins->ssa_args.dest & IS_REG;
88 needs_move |= mir_special_index(ctx, ins->ssa_args.dest);
89
90 /* Ensure this is a contiguous X-bound mask. It should be since
91 * we haven't done RA and per-component masked UBO reads don't
92 * make much sense. */
93
94 assert(((ins->mask + 1) & ins->mask) == 0);
95
96 /* Check the component count from the mask so we can setup a
97 * swizzle appropriately when promoting. The idea is to ensure
98 * the component count is preserved so RA can be smarter if we
99 * need to spill */
100
101 unsigned nr_components = util_bitcount(ins->mask);
102
103 if (needs_move) {
104 midgard_instruction mov = v_mov(promoted, blank_alu_src, ins->ssa_args.dest);
105 mov.mask = ins->mask;
106 mir_insert_instruction_before(ins, mov);
107 } else {
108 mir_rewrite_index_src_swizzle(ctx, ins->ssa_args.dest,
109 promoted, swizzle_of(nr_components));
110 }
111
112 mir_remove_instruction(ins);
113 }
114 }