Merge branch 'mesa_7_7_branch'
[mesa.git] / src / gallium / drivers / i965 / brw_wm_pass2.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_debug.h"
34 #include "brw_wm.h"
35
36
37 /* Use these to force spilling so that that functionality can be
38 * tested with known-good examples rather than having to construct new
39 * tests.
40 */
41 #define TEST_PAYLOAD_SPILLS 0
42 #define TEST_DST_SPILLS 0
43
44 static void spill_value(struct brw_wm_compile *c,
45 struct brw_wm_value *value);
46
47 static void prealloc_reg(struct brw_wm_compile *c,
48 struct brw_wm_value *value,
49 GLuint reg)
50 {
51 if (value->lastuse) {
52 /* Set nextuse to zero, it will be corrected by
53 * update_register_usage().
54 */
55 c->pass2_grf[reg].value = value;
56 c->pass2_grf[reg].nextuse = 0;
57
58 value->resident = &c->pass2_grf[reg];
59 value->hw_reg = brw_vec8_grf(reg*2, 0);
60
61 if (TEST_PAYLOAD_SPILLS)
62 spill_value(c, value);
63 }
64 }
65
66
67 /* Initialize all the register values. Do the initial setup
68 * calculations for interpolants.
69 */
70 static void init_registers( struct brw_wm_compile *c )
71 {
72 GLuint reg = 0;
73 GLuint j;
74
75 for (j = 0; j < c->grf_limit; j++)
76 c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
77
78 /* Pre-allocate incoming payload regs:
79 */
80 for (j = 0; j < c->key.nr_depth_regs; j++)
81 prealloc_reg(c, &c->payload.depth[j], reg++);
82
83 for (j = 0; j < c->nr_creg; j++)
84 prealloc_reg(c, &c->creg[j], reg++);
85
86 reg++; /* XXX: skip over position output */
87
88 /* XXX: currently just hope the VS outputs line up with FS inputs:
89 */
90 for (j = 0; j < c->key.nr_inputs; j++)
91 prealloc_reg(c, &c->payload.input_interp[j], reg++);
92
93 c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
94 c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2;
95 c->prog_data.curb_read_length = c->nr_creg * 2;
96
97 /* Note this allocation:
98 */
99 c->max_wm_grf = reg * 2;
100 }
101
102
103 /* Update the nextuse value for each register in our file.
104 */
105 static void update_register_usage(struct brw_wm_compile *c,
106 GLuint thisinsn)
107 {
108 GLuint i;
109
110 for (i = 1; i < c->grf_limit; i++) {
111 struct brw_wm_grf *grf = &c->pass2_grf[i];
112
113 /* Only search those which can change:
114 */
115 if (grf->nextuse < thisinsn) {
116 const struct brw_wm_ref *ref = grf->value->lastuse;
117
118 /* Has last use of value been passed?
119 */
120 if (ref->insn < thisinsn) {
121 grf->value->resident = 0;
122 grf->value = 0;
123 grf->nextuse = BRW_WM_MAX_INSN;
124 }
125 else {
126 /* Else loop through chain to update:
127 */
128 while (ref->prevuse && ref->prevuse->insn >= thisinsn)
129 ref = ref->prevuse;
130
131 grf->nextuse = ref->insn;
132 }
133 }
134 }
135 }
136
137
138 static void spill_value(struct brw_wm_compile *c,
139 struct brw_wm_value *value)
140 {
141 /* Allocate a spill slot. Note that allocations start from 0x40 -
142 * the first slot is reserved to mean "undef" in brw_wm_emit.c
143 */
144 if (!value->spill_slot) {
145 c->last_scratch += 0x40;
146 value->spill_slot = c->last_scratch;
147 }
148
149 /* The spill will be done in brw_wm_emit.c immediately after the
150 * value is calculated, so we can just take this reg without any
151 * further work.
152 */
153 value->resident->value = NULL;
154 value->resident->nextuse = BRW_WM_MAX_INSN;
155 value->resident = NULL;
156 }
157
158
159
160 /* Search for contiguous region with the most distant nearest
161 * member. Free regs count as very distant.
162 *
163 * TODO: implement spill-to-reg so that we can rearrange discontigous
164 * free regs and then spill the oldest non-free regs in sequence.
165 * This would mean inserting instructions in this pass.
166 */
167 static GLuint search_contiguous_regs(struct brw_wm_compile *c,
168 GLuint nr,
169 GLuint thisinsn)
170 {
171 struct brw_wm_grf *grf = c->pass2_grf;
172 GLuint furthest = 0;
173 GLuint reg = 0;
174 GLuint i, j;
175
176 /* Start search at 1: r0 is special and can't be used or spilled.
177 */
178 for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
179 GLuint group_nextuse = BRW_WM_MAX_INSN;
180
181 for (j = 0; j < nr; j++) {
182 if (grf[i+j].nextuse < group_nextuse)
183 group_nextuse = grf[i+j].nextuse;
184 }
185
186 if (group_nextuse > furthest) {
187 furthest = group_nextuse;
188 reg = i;
189 }
190 }
191
192 assert(furthest != thisinsn);
193
194 /* Any non-empty regs will need to be spilled:
195 */
196 for (j = 0; j < nr; j++)
197 if (grf[reg+j].value)
198 spill_value(c, grf[reg+j].value);
199
200 return reg;
201 }
202
203
204 static void alloc_contiguous_dest(struct brw_wm_compile *c,
205 struct brw_wm_value *dst[],
206 GLuint nr,
207 GLuint thisinsn)
208 {
209 GLuint reg = search_contiguous_regs(c, nr, thisinsn);
210 GLuint i;
211
212 for (i = 0; i < nr; i++) {
213 if (!dst[i]) {
214 /* Need to grab a dummy value in TEX case. Don't introduce
215 * it into the tracking scheme.
216 */
217 dst[i] = &c->vreg[c->nr_vreg++];
218 }
219 else {
220 assert(!dst[i]->resident);
221 assert(c->pass2_grf[reg+i].nextuse != thisinsn);
222
223 c->pass2_grf[reg+i].value = dst[i];
224 c->pass2_grf[reg+i].nextuse = thisinsn;
225
226 dst[i]->resident = &c->pass2_grf[reg+i];
227 }
228
229 dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
230 }
231
232 if ((reg+nr)*2 > c->max_wm_grf)
233 c->max_wm_grf = (reg+nr) * 2;
234 }
235
236
237 static void load_args(struct brw_wm_compile *c,
238 struct brw_wm_instruction *inst)
239 {
240 GLuint thisinsn = inst - c->instruction;
241 GLuint i,j;
242
243 for (i = 0; i < 3; i++) {
244 for (j = 0; j < 4; j++) {
245 struct brw_wm_ref *ref = inst->src[i][j];
246
247 if (ref) {
248 if (!ref->value->resident) {
249 /* Need to bring the value in from scratch space. The code for
250 * this will be done in brw_wm_emit.c, here we just do the
251 * register allocation and mark the ref as requiring a fill.
252 */
253 GLuint reg = search_contiguous_regs(c, 1, thisinsn);
254
255 c->pass2_grf[reg].value = ref->value;
256 c->pass2_grf[reg].nextuse = thisinsn;
257
258 ref->value->resident = &c->pass2_grf[reg];
259
260 /* Note that a fill is required:
261 */
262 ref->unspill_reg = reg*2;
263 }
264
265 /* Adjust the hw_reg to point at the value's current location:
266 */
267 assert(ref->value == ref->value->resident->value);
268 ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
269 }
270 }
271 }
272 }
273
274
275
276 /* Step 3: Work forwards once again. Perform register allocations,
277 * taking into account instructions like TEX which require contiguous
278 * result registers. Where necessary spill registers to scratch space
279 * and reload later.
280 */
281 void brw_wm_pass2( struct brw_wm_compile *c )
282 {
283 GLuint insn;
284 GLuint i;
285
286 init_registers(c);
287
288 for (insn = 0; insn < c->nr_insns; insn++) {
289 struct brw_wm_instruction *inst = &c->instruction[insn];
290
291 /* Update registers' nextuse values:
292 */
293 update_register_usage(c, insn);
294
295 /* May need to unspill some args.
296 */
297 load_args(c, inst);
298
299 /* Allocate registers to hold results:
300 */
301 switch (inst->opcode) {
302 case TGSI_OPCODE_TEX:
303 case TGSI_OPCODE_TXB:
304 case TGSI_OPCODE_TXP:
305 alloc_contiguous_dest(c, inst->dst, 4, insn);
306 break;
307
308 default:
309 for (i = 0; i < 4; i++) {
310 if (inst->writemask & (1<<i)) {
311 assert(inst->dst[i]);
312 alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
313 }
314 }
315 break;
316 }
317
318 if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
319 for (i = 0; i < 4; i++)
320 if (inst->dst[i])
321 spill_value(c, inst->dst[i]);
322 }
323 }
324
325 if (BRW_DEBUG & DEBUG_WM) {
326 brw_wm_print_program(c, "pass2");
327 }
328
329 c->state = PASS2_DONE;
330
331 if (BRW_DEBUG & DEBUG_WM) {
332 brw_wm_print_program(c, "pass2/done");
333 }
334 }