Merge commit 'origin/mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_pass0.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "brw_context.h"
34 #include "brw_wm.h"
35 #include "shader/prog_parameter.h"
36
37
38
39 /***********************************************************************
40 */
41
42 static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
43 {
44 assert(c->nr_refs < BRW_WM_MAX_REF);
45 memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs));
46 return &c->refs[c->nr_refs++];
47 }
48
49 static struct brw_wm_value *get_value( struct brw_wm_compile *c)
50 {
51 assert(c->nr_refs < BRW_WM_MAX_VREG);
52 memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg));
53 return &c->vreg[c->nr_vreg++];
54 }
55
56 /** return pointer to a newly allocated instruction */
57 static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
58 {
59 assert(c->nr_insns < BRW_WM_MAX_INSN);
60 memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction));
61 return &c->instruction[c->nr_insns++];
62 }
63
64 /***********************************************************************
65 */
66
67 /** Init the "undef" register */
68 static void pass0_init_undef( struct brw_wm_compile *c)
69 {
70 struct brw_wm_ref *ref = &c->undef_ref;
71 ref->value = &c->undef_value;
72 ref->hw_reg = brw_vec8_grf(0, 0);
73 ref->insn = 0;
74 ref->prevuse = NULL;
75 }
76
77 /** Set a FP register to a value */
78 static void pass0_set_fpreg_value( struct brw_wm_compile *c,
79 GLuint file,
80 GLuint idx,
81 GLuint component,
82 struct brw_wm_value *value )
83 {
84 struct brw_wm_ref *ref = get_ref(c);
85 ref->value = value;
86 ref->hw_reg = brw_vec8_grf(0, 0);
87 ref->insn = 0;
88 ref->prevuse = NULL;
89 c->pass0_fp_reg[file][idx][component] = ref;
90 }
91
92 /** Set a FP register to a ref */
93 static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
94 GLuint file,
95 GLuint idx,
96 GLuint component,
97 const struct brw_wm_ref *src_ref )
98 {
99 c->pass0_fp_reg[file][idx][component] = src_ref;
100 }
101
102 static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
103 const GLfloat *param_ptr )
104 {
105 GLuint i = c->prog_data.nr_params++;
106
107 if (i >= BRW_WM_MAX_PARAM) {
108 _mesa_printf("%s: out of params\n", __FUNCTION__);
109 c->prog_data.error = 1;
110 return NULL;
111 }
112 else {
113 struct brw_wm_ref *ref = get_ref(c);
114
115 c->prog_data.param[i] = param_ptr;
116 c->nr_creg = (i+16)/16;
117
118 /* Push the offsets into hw_reg. These will be added to the
119 * real register numbers once one is allocated in pass2.
120 */
121 ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
122 ref->value = &c->creg[i/16];
123 ref->insn = 0;
124 ref->prevuse = NULL;
125
126 return ref;
127 }
128 }
129
130
131 /** Return a ref to a constant/literal value */
132 static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
133 const GLfloat *constval )
134 {
135 GLuint i;
136
137 /* Search for an existing const value matching the request:
138 */
139 for (i = 0; i < c->nr_constrefs; i++) {
140 if (c->constref[i].constval == *constval)
141 return c->constref[i].ref;
142 }
143
144 /* Else try to add a new one:
145 */
146 if (c->nr_constrefs < BRW_WM_MAX_CONST) {
147 GLuint i = c->nr_constrefs++;
148
149 /* A constant is a special type of parameter:
150 */
151 c->constref[i].constval = *constval;
152 c->constref[i].ref = get_param_ref(c, constval);
153
154 return c->constref[i].ref;
155 }
156 else {
157 _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
158 c->prog_data.error = 1;
159 return NULL;
160 }
161 }
162
163
164 /* Lookup our internal registers
165 */
166 static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
167 GLuint file,
168 GLuint idx,
169 GLuint component )
170 {
171 const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
172
173 if (!ref) {
174 switch (file) {
175 case PROGRAM_INPUT:
176 case PROGRAM_PAYLOAD:
177 case PROGRAM_TEMPORARY:
178 case PROGRAM_OUTPUT:
179 case PROGRAM_VARYING:
180 break;
181
182 case PROGRAM_LOCAL_PARAM:
183 ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
184 break;
185
186 case PROGRAM_ENV_PARAM:
187 ref = get_param_ref(c, &c->env_param[idx][component]);
188 break;
189
190 case PROGRAM_STATE_VAR:
191 case PROGRAM_UNIFORM:
192 case PROGRAM_CONSTANT:
193 case PROGRAM_NAMED_PARAM: {
194 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
195
196 /* There's something really hokey about parameters parsed in
197 * arb programs - they all end up in here, whether they be
198 * state values, parameters or constants. This duplicates the
199 * structure above & also seems to subvert the limits set for
200 * each type of constant/param.
201 */
202 switch (plist->Parameters[idx].Type) {
203 case PROGRAM_NAMED_PARAM:
204 case PROGRAM_CONSTANT:
205 /* These are invarient:
206 */
207 ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
208 break;
209
210 case PROGRAM_STATE_VAR:
211 case PROGRAM_UNIFORM:
212 /* These may change from run to run:
213 */
214 ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
215 break;
216
217 default:
218 assert(0);
219 break;
220 }
221 break;
222 }
223
224 default:
225 assert(0);
226 break;
227 }
228
229 c->pass0_fp_reg[file][idx][component] = ref;
230 }
231
232 if (!ref)
233 ref = &c->undef_ref;
234
235 return ref;
236 }
237
238
239
240 /***********************************************************************
241 * Straight translation to internal instruction format
242 */
243
244 static void pass0_set_dst( struct brw_wm_compile *c,
245 struct brw_wm_instruction *out,
246 const struct prog_instruction *inst,
247 GLuint writemask )
248 {
249 const struct prog_dst_register *dst = &inst->DstReg;
250 GLuint i;
251
252 for (i = 0; i < 4; i++) {
253 if (writemask & (1<<i)) {
254 out->dst[i] = get_value(c);
255 pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
256 }
257 }
258
259 out->writemask = writemask;
260 }
261
262
263 static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
264 struct prog_src_register src,
265 GLuint i )
266 {
267 GLuint component = GET_SWZ(src.Swizzle,i);
268 const struct brw_wm_ref *src_ref;
269 static const GLfloat const_zero = 0.0;
270 static const GLfloat const_one = 1.0;
271
272 if (component == SWIZZLE_ZERO)
273 src_ref = get_const_ref(c, &const_zero);
274 else if (component == SWIZZLE_ONE)
275 src_ref = get_const_ref(c, &const_one);
276 else
277 src_ref = pass0_get_reg(c, src.File, src.Index, component);
278
279 return src_ref;
280 }
281
282
283 static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
284 struct prog_src_register src,
285 GLuint i,
286 struct brw_wm_instruction *insn)
287 {
288 const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
289 struct brw_wm_ref *newref = get_ref(c);
290
291 newref->value = ref->value;
292 newref->hw_reg = ref->hw_reg;
293
294 if (insn) {
295 newref->insn = insn - c->instruction;
296 newref->prevuse = newref->value->lastuse;
297 newref->value->lastuse = newref;
298 }
299
300 if (src.Negate & (1 << i))
301 newref->hw_reg.negate ^= 1;
302
303 if (src.Abs) {
304 newref->hw_reg.negate = 0;
305 newref->hw_reg.abs = 1;
306 }
307
308 return newref;
309 }
310
311
312 static void
313 translate_insn(struct brw_wm_compile *c,
314 const struct prog_instruction *inst)
315 {
316 struct brw_wm_instruction *out = get_instruction(c);
317 GLuint writemask = inst->DstReg.WriteMask;
318 GLuint nr_args = brw_wm_nr_args(inst->Opcode);
319 GLuint i, j;
320
321 /* Copy some data out of the instruction
322 */
323 out->opcode = inst->Opcode;
324 out->saturate = (inst->SaturateMode != SATURATE_OFF);
325 out->tex_unit = inst->TexSrcUnit;
326 out->tex_idx = inst->TexSrcTarget;
327 out->tex_shadow = inst->TexShadow;
328 out->eot = inst->Aux & INST_AUX_EOT;
329 out->target = INST_AUX_GET_TARGET(inst->Aux);
330
331 /* Args:
332 */
333 for (i = 0; i < nr_args; i++) {
334 for (j = 0; j < 4; j++) {
335 out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
336 }
337 }
338
339 /* Dst:
340 */
341 pass0_set_dst(c, out, inst, writemask);
342 }
343
344
345
346 /***********************************************************************
347 * Optimize moves and swizzles away:
348 */
349 static void pass0_precalc_mov( struct brw_wm_compile *c,
350 const struct prog_instruction *inst )
351 {
352 const struct prog_dst_register *dst = &inst->DstReg;
353 GLuint writemask = inst->DstReg.WriteMask;
354 struct brw_wm_ref *refs[4];
355 GLuint i;
356
357 /* Get the effect of a MOV by manipulating our register table:
358 * First get all refs, then assign refs. This ensures that "in-place"
359 * swizzles such as:
360 * MOV t, t.xxyx
361 * are handled correctly. Previously, these two steps were done in
362 * one loop and the above case was incorrectly handled.
363 */
364 for (i = 0; i < 4; i++) {
365 refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
366 }
367 for (i = 0; i < 4; i++) {
368 if (writemask & (1 << i)) {
369 pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
370 }
371 }
372 }
373
374
375 /* Initialize payload "registers".
376 */
377 static void pass0_init_payload( struct brw_wm_compile *c )
378 {
379 GLuint i;
380
381 for (i = 0; i < 4; i++) {
382 GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
383 pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
384 &c->payload.depth[j] );
385 }
386
387 #if 0
388 /* This seems to be an alternative to the INTERP_WPOS stuff I do
389 * elsewhere:
390 */
391 if (c->key.source_depth_reg)
392 pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
393 &c->payload.depth[c->key.source_depth_reg/2]);
394 #endif
395
396 for (i = 0; i < FRAG_ATTRIB_MAX; i++)
397 pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0,
398 &c->payload.input_interp[i] );
399 }
400
401
402 /***********************************************************************
403 * PASS 0
404 *
405 * Work forwards to give each calculated value a unique number. Where
406 * an instruction produces duplicate values (eg DP3), all are given
407 * the same number.
408 *
409 * Translate away swizzling and eliminate non-saturating moves.
410 */
411 void brw_wm_pass0( struct brw_wm_compile *c )
412 {
413 GLuint insn;
414
415 c->nr_vreg = 0;
416 c->nr_insns = 0;
417
418 pass0_init_undef(c);
419 pass0_init_payload(c);
420
421 for (insn = 0; insn < c->nr_fp_insns; insn++) {
422 const struct prog_instruction *inst = &c->prog_instructions[insn];
423
424 /* Optimize away moves, otherwise emit translated instruction:
425 */
426 switch (inst->Opcode) {
427 case OPCODE_MOV:
428 case OPCODE_SWZ:
429 if (!inst->SaturateMode) {
430 pass0_precalc_mov(c, inst);
431 }
432 else {
433 translate_insn(c, inst);
434 }
435 break;
436 default:
437 translate_insn(c, inst);
438 break;
439 }
440 }
441
442 if (INTEL_DEBUG & DEBUG_WM) {
443 brw_wm_print_program(c, "pass0");
444 }
445 }