i965: clip: Change computation of nr_regs to use VUE map.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4.cpp
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4.h"
25 extern "C" {
26 #include "main/macros.h"
27 #include "program/prog_parameter.h"
28 }
29
30 #define MAX_INSTRUCTION (1 << 30)
31
32 namespace brw {
33
34 void
35 vec4_visitor::calculate_live_intervals()
36 {
37 int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
38 int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
39 int loop_depth = 0;
40 int loop_start = 0;
41
42 if (this->live_intervals_valid)
43 return;
44
45 for (int i = 0; i < virtual_grf_count; i++) {
46 def[i] = MAX_INSTRUCTION;
47 use[i] = -1;
48 }
49
50 int ip = 0;
51 foreach_list(node, &this->instructions) {
52 vec4_instruction *inst = (vec4_instruction *)node;
53
54 if (inst->opcode == BRW_OPCODE_DO) {
55 if (loop_depth++ == 0)
56 loop_start = ip;
57 } else if (inst->opcode == BRW_OPCODE_WHILE) {
58 loop_depth--;
59
60 if (loop_depth == 0) {
61 /* Patches up the use of vars marked for being live across
62 * the whole loop.
63 */
64 for (int i = 0; i < virtual_grf_count; i++) {
65 if (use[i] == loop_start) {
66 use[i] = ip;
67 }
68 }
69 }
70 } else {
71 for (unsigned int i = 0; i < 3; i++) {
72 if (inst->src[i].file == GRF) {
73 int reg = inst->src[i].reg;
74
75 if (!loop_depth) {
76 use[reg] = ip;
77 } else {
78 def[reg] = MIN2(loop_start, def[reg]);
79 use[reg] = loop_start;
80
81 /* Nobody else is going to go smash our start to
82 * later in the loop now, because def[reg] now
83 * points before the bb header.
84 */
85 }
86 }
87 }
88 if (inst->dst.file == GRF) {
89 int reg = inst->dst.reg;
90
91 if (!loop_depth) {
92 def[reg] = MIN2(def[reg], ip);
93 } else {
94 def[reg] = MIN2(def[reg], loop_start);
95 }
96 }
97 }
98
99 ip++;
100 }
101
102 ralloc_free(this->virtual_grf_def);
103 ralloc_free(this->virtual_grf_use);
104 this->virtual_grf_def = def;
105 this->virtual_grf_use = use;
106
107 this->live_intervals_valid = true;
108 }
109
110 bool
111 vec4_visitor::virtual_grf_interferes(int a, int b)
112 {
113 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
114 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
115
116 /* We can't handle dead register writes here, without iterating
117 * over the whole instruction stream to find every single dead
118 * write to that register to compare to the live interval of the
119 * other register. Just assert that dead_code_eliminate() has been
120 * called.
121 */
122 assert((this->virtual_grf_use[a] != -1 ||
123 this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
124 (this->virtual_grf_use[b] != -1 ||
125 this->virtual_grf_def[b] == MAX_INSTRUCTION));
126
127 return start < end;
128 }
129
130 /**
131 * Must be called after calculate_live_intervales() to remove unused
132 * writes to registers -- register allocation will fail otherwise
133 * because something deffed but not used won't be considered to
134 * interfere with other regs.
135 */
136 bool
137 vec4_visitor::dead_code_eliminate()
138 {
139 bool progress = false;
140 int pc = 0;
141
142 calculate_live_intervals();
143
144 foreach_list_safe(node, &this->instructions) {
145 vec4_instruction *inst = (vec4_instruction *)node;
146
147 if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
148 inst->remove();
149 progress = true;
150 }
151
152 pc++;
153 }
154
155 if (progress)
156 live_intervals_valid = false;
157
158 return progress;
159 }
160
161 void
162 vec4_visitor::split_uniform_registers()
163 {
164 /* Prior to this, uniforms have been in an array sized according to
165 * the number of vector uniforms present, sparsely filled (so an
166 * aggregate results in reg indices being skipped over). Now we're
167 * going to cut those aggregates up so each .reg index is one
168 * vector. The goal is to make elimination of unused uniform
169 * components easier later.
170 */
171 foreach_list(node, &this->instructions) {
172 vec4_instruction *inst = (vec4_instruction *)node;
173
174 for (int i = 0 ; i < 3; i++) {
175 if (inst->src[i].file != UNIFORM)
176 continue;
177
178 assert(!inst->src[i].reladdr);
179
180 inst->src[i].reg += inst->src[i].reg_offset;
181 inst->src[i].reg_offset = 0;
182 }
183 }
184
185 /* Update that everything is now vector-sized. */
186 for (int i = 0; i < this->uniforms; i++) {
187 this->uniform_size[i] = 1;
188 }
189 }
190
191 void
192 vec4_visitor::pack_uniform_registers()
193 {
194 bool uniform_used[this->uniforms];
195 int new_loc[this->uniforms];
196 int new_chan[this->uniforms];
197
198 memset(uniform_used, 0, sizeof(uniform_used));
199 memset(new_loc, 0, sizeof(new_loc));
200 memset(new_chan, 0, sizeof(new_chan));
201
202 /* Find which uniform vectors are actually used by the program. We
203 * expect unused vector elements when we've moved array access out
204 * to pull constants, and from some GLSL code generators like wine.
205 */
206 foreach_list(node, &this->instructions) {
207 vec4_instruction *inst = (vec4_instruction *)node;
208
209 for (int i = 0 ; i < 3; i++) {
210 if (inst->src[i].file != UNIFORM)
211 continue;
212
213 uniform_used[inst->src[i].reg] = true;
214 }
215 }
216
217 int new_uniform_count = 0;
218
219 /* Now, figure out a packing of the live uniform vectors into our
220 * push constants.
221 */
222 for (int src = 0; src < uniforms; src++) {
223 int size = this->uniform_vector_size[src];
224
225 if (!uniform_used[src]) {
226 this->uniform_vector_size[src] = 0;
227 continue;
228 }
229
230 int dst;
231 /* Find the lowest place we can slot this uniform in. */
232 for (dst = 0; dst < src; dst++) {
233 if (this->uniform_vector_size[dst] + size <= 4)
234 break;
235 }
236
237 if (src == dst) {
238 new_loc[src] = dst;
239 new_chan[src] = 0;
240 } else {
241 new_loc[src] = dst;
242 new_chan[src] = this->uniform_vector_size[dst];
243
244 /* Move the references to the data */
245 for (int j = 0; j < size; j++) {
246 c->prog_data.param[dst * 4 + new_chan[src] + j] =
247 c->prog_data.param[src * 4 + j];
248 }
249
250 this->uniform_vector_size[dst] += size;
251 this->uniform_vector_size[src] = 0;
252 }
253
254 new_uniform_count = MAX2(new_uniform_count, dst + 1);
255 }
256
257 this->uniforms = new_uniform_count;
258
259 /* Now, update the instructions for our repacked uniforms. */
260 foreach_list(node, &this->instructions) {
261 vec4_instruction *inst = (vec4_instruction *)node;
262
263 for (int i = 0 ; i < 3; i++) {
264 int src = inst->src[i].reg;
265
266 if (inst->src[i].file != UNIFORM)
267 continue;
268
269 inst->src[i].reg = new_loc[src];
270
271 int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src];
272 int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src];
273 int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src];
274 int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src];
275 inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw);
276 }
277 }
278 }
279
280 } /* namespace brw */