v3d: Stop tracking num_inputs for VPM loads.
[mesa.git] / src / broadcom / compiler / vir_opt_dead_code.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file v3d_opt_dead_code.c
26 *
27 * This is a simple dead code eliminator for SSA values in VIR.
28 *
29 * It walks all the instructions finding what temps are used, then walks again
30 * to remove instructions writing unused temps.
31 *
32 * This is an inefficient implementation if you have long chains of
33 * instructions where the entire chain is dead, but we expect those to have
34 * been eliminated at the NIR level, and here we're just cleaning up small
35 * problems produced by NIR->VIR.
36 */
37
38 #include "v3d_compiler.h"
39
40 static bool debug;
41
42 static void
43 dce(struct v3d_compile *c, struct qinst *inst)
44 {
45 if (debug) {
46 fprintf(stderr, "Removing: ");
47 vir_dump_inst(c, inst);
48 fprintf(stderr, "\n");
49 }
50 assert(!v3d_qpu_writes_flags(&inst->qpu));
51 vir_remove_instruction(c, inst);
52 }
53
54 static bool
55 has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
56 {
57 for (int i = 0; i < vir_get_nsrc(inst); i++) {
58 if (inst->src[i].file == QFILE_VPM) {
59 /* Instance ID, Vertex ID: Should have been removed at
60 * the NIR level
61 */
62 if (inst->src[i].index == ~0)
63 return true;
64
65 uint32_t attr = inst->src[i].index / 4;
66 uint32_t offset = inst->src[i].index % 4;
67
68 if (c->vattr_sizes[attr] != offset)
69 return true;
70
71 /* Can't get rid of the last VPM read, or the
72 * simulator (at least) throws an error.
73 */
74 uint32_t total_size = 0;
75 for (uint32_t i = 0; i < ARRAY_SIZE(c->vattr_sizes); i++)
76 total_size += c->vattr_sizes[i];
77 if (total_size == 1)
78 return true;
79 }
80 }
81
82 return false;
83 }
84
85 static bool
86 can_write_to_null(struct v3d_compile *c, struct qinst *inst)
87 {
88 /* The SFU instructions must write to a physical register. */
89 if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
90 return false;
91
92 return true;
93 }
94
95 static void
96 vir_dce_flags(struct v3d_compile *c, struct qinst *inst)
97 {
98 if (debug) {
99 fprintf(stderr,
100 "Removing flags write from: ");
101 vir_dump_inst(c, inst);
102 fprintf(stderr, "\n");
103 }
104
105 assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
106
107 inst->qpu.flags.apf = V3D_QPU_PF_NONE;
108 inst->qpu.flags.mpf = V3D_QPU_PF_NONE;
109 inst->qpu.flags.auf = V3D_QPU_UF_NONE;
110 inst->qpu.flags.muf = V3D_QPU_UF_NONE;
111 }
112
113 bool
114 vir_opt_dead_code(struct v3d_compile *c)
115 {
116 bool progress = false;
117 bool *used = calloc(c->num_temps, sizeof(bool));
118
119 /* Defuse the "are you removing the cursor?" assertion in the core.
120 * You'll need to set up a new cursor for any new instructions after
121 * doing DCE (which we would expect, anyway).
122 */
123 c->cursor.link = NULL;
124
125 vir_for_each_inst_inorder(inst, c) {
126 for (int i = 0; i < vir_get_nsrc(inst); i++) {
127 if (inst->src[i].file == QFILE_TEMP)
128 used[inst->src[i].index] = true;
129 }
130 }
131
132 vir_for_each_block(block, c) {
133 struct qinst *last_flags_write = NULL;
134
135 vir_for_each_inst_safe(inst, block) {
136 /* If this instruction reads the flags, we can't
137 * remove the flags generation for it.
138 */
139 if (v3d_qpu_reads_flags(&inst->qpu))
140 last_flags_write = NULL;
141
142 if (inst->dst.file != QFILE_NULL &&
143 !(inst->dst.file == QFILE_TEMP &&
144 !used[inst->dst.index])) {
145 continue;
146 }
147
148 if (vir_has_side_effects(c, inst))
149 continue;
150
151 if (v3d_qpu_writes_flags(&inst->qpu)) {
152 /* If we obscure a previous flags write,
153 * drop it.
154 */
155 if (last_flags_write &&
156 (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
157 inst->qpu.flags.mpf != V3D_QPU_PF_NONE)) {
158 vir_dce_flags(c, last_flags_write);
159 progress = true;
160 }
161
162 last_flags_write = inst;
163 }
164
165 if (v3d_qpu_writes_flags(&inst->qpu) ||
166 has_nonremovable_reads(c, inst)) {
167 /* If we can't remove the instruction, but we
168 * don't need its destination value, just
169 * remove the destination. The register
170 * allocator would trivially color it and it
171 * wouldn't cause any register pressure, but
172 * it's nicer to read the VIR code without
173 * unused destination regs.
174 */
175 if (inst->dst.file == QFILE_TEMP &&
176 can_write_to_null(c, inst)) {
177 if (debug) {
178 fprintf(stderr,
179 "Removing dst from: ");
180 vir_dump_inst(c, inst);
181 fprintf(stderr, "\n");
182 }
183 c->defs[inst->dst.index] = NULL;
184 inst->dst.file = QFILE_NULL;
185 progress = true;
186 }
187 continue;
188 }
189
190 for (int i = 0; i < vir_get_nsrc(inst); i++) {
191 if (inst->src[i].file != QFILE_VPM)
192 continue;
193 uint32_t attr = inst->src[i].index / 4;
194 uint32_t offset = (inst->src[i].index % 4);
195
196 if (c->vattr_sizes[attr] == offset) {
197 c->vattr_sizes[attr]--;
198 }
199 }
200
201 assert(inst != last_flags_write);
202 dce(c, inst);
203 progress = true;
204 continue;
205 }
206 }
207
208 free(used);
209
210 return progress;
211 }