mesa: added "main/" prefix to includes, remove some -I paths from Makefile.template
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE"
62 };
63
64 #if 0
65 static const char *wm_file_strings[] = {
66 "PAYLOAD"
67 };
68 #endif
69
70
71 /***********************************************************************
72 * Source regs
73 */
74
75 static struct prog_src_register src_reg(GLuint file, GLuint idx)
76 {
77 struct prog_src_register reg;
78 reg.File = file;
79 reg.Index = idx;
80 reg.Swizzle = SWIZZLE_NOOP;
81 reg.RelAddr = 0;
82 reg.NegateBase = 0;
83 reg.Abs = 0;
84 reg.NegateAbs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114
115 /***********************************************************************
116 * Dest regs
117 */
118
119 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
120 {
121 struct prog_dst_register reg;
122 reg.File = file;
123 reg.Index = idx;
124 reg.WriteMask = WRITEMASK_XYZW;
125 reg.CondMask = 0;
126 reg.CondSwizzle = 0;
127 reg.pad = 0;
128 reg.CondSrc = 0;
129 return reg;
130 }
131
132 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
133 {
134 reg.WriteMask &= mask;
135 return reg;
136 }
137
138 static struct prog_dst_register dst_undef( void )
139 {
140 return dst_reg(PROGRAM_UNDEFINED, 0);
141 }
142
143
144
145 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
146 {
147 int bit = _mesa_ffs( ~c->fp_temp );
148
149 if (!bit) {
150 _mesa_printf("%s: out of temporaries\n", __FILE__);
151 exit(1);
152 }
153
154 c->fp_temp |= 1<<(bit-1);
155 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
156 }
157
158
159 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
160 {
161 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
162 }
163
164
165 /***********************************************************************
166 * Instructions
167 */
168
169 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
170 {
171 return &c->prog_instructions[c->nr_fp_insns++];
172 }
173
174 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
175 const struct prog_instruction *inst0)
176 {
177 struct prog_instruction *inst = get_fp_inst(c);
178 *inst = *inst0;
179 inst->Data = (void *)inst0;
180 return inst;
181 }
182
183 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
184 GLuint op,
185 struct prog_dst_register dest,
186 GLuint saturate,
187 GLuint tex_src_unit,
188 GLuint tex_src_target,
189 struct prog_src_register src0,
190 struct prog_src_register src1,
191 struct prog_src_register src2 )
192 {
193 struct prog_instruction *inst = get_fp_inst(c);
194
195 memset(inst, 0, sizeof(*inst));
196
197 inst->Opcode = op;
198 inst->DstReg = dest;
199 inst->SaturateMode = saturate;
200 inst->TexSrcUnit = tex_src_unit;
201 inst->TexSrcTarget = tex_src_target;
202 inst->SrcReg[0] = src0;
203 inst->SrcReg[1] = src1;
204 inst->SrcReg[2] = src2;
205 return inst;
206 }
207
208
209
210
211 /***********************************************************************
212 * Special instructions for interpolation and other tasks
213 */
214
215 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
216 {
217 if (src_is_undef(c->pixel_xy)) {
218 struct prog_dst_register pixel_xy = get_temp(c);
219 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
220
221
222 /* Emit the out calculations, and hold onto the results. Use
223 * two instructions as a temporary is required.
224 */
225 /* pixel_xy.xy = PIXELXY payload[0];
226 */
227 emit_op(c,
228 WM_PIXELXY,
229 dst_mask(pixel_xy, WRITEMASK_XY),
230 0, 0, 0,
231 payload_r0_depth,
232 src_undef(),
233 src_undef());
234
235 c->pixel_xy = src_reg_from_dst(pixel_xy);
236 }
237
238 return c->pixel_xy;
239 }
240
241 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
242 {
243 if (src_is_undef(c->delta_xy)) {
244 struct prog_dst_register delta_xy = get_temp(c);
245 struct prog_src_register pixel_xy = get_pixel_xy(c);
246 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
247
248 /* deltas.xy = DELTAXY pixel_xy, payload[0]
249 */
250 emit_op(c,
251 WM_DELTAXY,
252 dst_mask(delta_xy, WRITEMASK_XY),
253 0, 0, 0,
254 pixel_xy,
255 payload_r0_depth,
256 src_undef());
257
258 c->delta_xy = src_reg_from_dst(delta_xy);
259 }
260
261 return c->delta_xy;
262 }
263
264 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
265 {
266 if (src_is_undef(c->pixel_w)) {
267 struct prog_dst_register pixel_w = get_temp(c);
268 struct prog_src_register deltas = get_delta_xy(c);
269 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
270
271
272 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
273 */
274 emit_op(c,
275 WM_PIXELW,
276 dst_mask(pixel_w, WRITEMASK_W),
277 0, 0, 0,
278 interp_wpos,
279 deltas,
280 src_undef());
281
282
283 c->pixel_w = src_reg_from_dst(pixel_w);
284 }
285
286 return c->pixel_w;
287 }
288
289 static void emit_interp( struct brw_wm_compile *c,
290 GLuint idx )
291 {
292 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
293 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
294 struct prog_src_register deltas = get_delta_xy(c);
295 struct prog_src_register arg2;
296 GLuint opcode;
297
298 /* Need to use PINTERP on attributes which have been
299 * multiplied by 1/W in the SF program, and LINTERP on those
300 * which have not:
301 */
302 switch (idx) {
303 case FRAG_ATTRIB_WPOS:
304 opcode = WM_LINTERP;
305 arg2 = src_undef();
306
307 /* Have to treat wpos.xy specially:
308 */
309 emit_op(c,
310 WM_WPOSXY,
311 dst_mask(dst, WRITEMASK_XY),
312 0, 0, 0,
313 get_pixel_xy(c),
314 src_undef(),
315 src_undef());
316
317 dst = dst_mask(dst, WRITEMASK_ZW);
318
319 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
320 */
321 emit_op(c,
322 WM_LINTERP,
323 dst,
324 0, 0, 0,
325 interp,
326 deltas,
327 arg2);
328 break;
329 case FRAG_ATTRIB_COL0:
330 case FRAG_ATTRIB_COL1:
331 if (c->key.flat_shade) {
332 emit_op(c,
333 WM_CINTERP,
334 dst,
335 0, 0, 0,
336 interp,
337 src_undef(),
338 src_undef());
339 }
340 else {
341 emit_op(c,
342 WM_LINTERP,
343 dst,
344 0, 0, 0,
345 interp,
346 deltas,
347 src_undef());
348 }
349 break;
350 default:
351 emit_op(c,
352 WM_PINTERP,
353 dst,
354 0, 0, 0,
355 interp,
356 deltas,
357 get_pixel_w(c));
358 break;
359 }
360
361 c->fp_interp_emitted |= 1<<idx;
362 }
363
364 static void emit_ddx( struct brw_wm_compile *c,
365 const struct prog_instruction *inst )
366 {
367 GLuint idx = inst->SrcReg[0].Index;
368 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
369
370 c->fp_deriv_emitted |= 1<<idx;
371 emit_op(c,
372 OPCODE_DDX,
373 inst->DstReg,
374 0, 0, 0,
375 interp,
376 get_pixel_w(c),
377 src_undef());
378 }
379
380 static void emit_ddy( struct brw_wm_compile *c,
381 const struct prog_instruction *inst )
382 {
383 GLuint idx = inst->SrcReg[0].Index;
384 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
385
386 c->fp_deriv_emitted |= 1<<idx;
387 emit_op(c,
388 OPCODE_DDY,
389 inst->DstReg,
390 0, 0, 0,
391 interp,
392 get_pixel_w(c),
393 src_undef());
394 }
395
396 /***********************************************************************
397 * Hacks to extend the program parameter and constant lists.
398 */
399
400 /* Add the fog parameters to the parameter list of the original
401 * program, rather than creating a new list. Doesn't really do any
402 * harm and it's not as if the parameter handling isn't a big hack
403 * anyway.
404 */
405 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
406 GLint s0,
407 GLint s1,
408 GLint s2,
409 GLint s3,
410 GLint s4)
411 {
412 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
413 gl_state_index tokens[STATE_LENGTH];
414 GLuint idx;
415 tokens[0] = s0;
416 tokens[1] = s1;
417 tokens[2] = s2;
418 tokens[3] = s3;
419 tokens[4] = s4;
420
421 for (idx = 0; idx < paramList->NumParameters; idx++) {
422 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
423 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
424 return src_reg(PROGRAM_STATE_VAR, idx);
425 }
426
427 idx = _mesa_add_state_reference( paramList, tokens );
428
429 /* Recalculate state dependency:
430 */
431 c->fp->param_state = paramList->StateFlags;
432
433 return src_reg(PROGRAM_STATE_VAR, idx);
434 }
435
436
437 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
438 GLfloat s0,
439 GLfloat s1,
440 GLfloat s2,
441 GLfloat s3)
442 {
443 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
444 GLfloat values[4];
445 GLuint idx;
446 GLuint swizzle;
447
448 values[0] = s0;
449 values[1] = s1;
450 values[2] = s2;
451 values[3] = s3;
452
453 /* Have to search, otherwise multiple compilations will each grow
454 * the parameter list.
455 */
456 for (idx = 0; idx < paramList->NumParameters; idx++) {
457 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
458 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
459
460 /* XXX: this mimics the mesa bug which puts all constants and
461 * parameters into the "PROGRAM_STATE_VAR" category:
462 */
463 return src_reg(PROGRAM_STATE_VAR, idx);
464 }
465
466 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
467 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
468 return src_reg(PROGRAM_STATE_VAR, idx);
469 }
470
471
472
473 /***********************************************************************
474 * Expand various instructions here to simpler forms.
475 */
476 static void precalc_dst( struct brw_wm_compile *c,
477 const struct prog_instruction *inst )
478 {
479 struct prog_src_register src0 = inst->SrcReg[0];
480 struct prog_src_register src1 = inst->SrcReg[1];
481 struct prog_dst_register dst = inst->DstReg;
482
483 if (dst.WriteMask & WRITEMASK_Y) {
484 /* dst.y = mul src0.y, src1.y
485 */
486 emit_op(c,
487 OPCODE_MUL,
488 dst_mask(dst, WRITEMASK_Y),
489 inst->SaturateMode, 0, 0,
490 src0,
491 src1,
492 src_undef());
493 }
494
495
496 if (dst.WriteMask & WRITEMASK_XZ) {
497 struct prog_instruction *swz;
498 GLuint z = GET_SWZ(src0.Swizzle, Z);
499
500 /* dst.xz = swz src0.1zzz
501 */
502 swz = emit_op(c,
503 OPCODE_SWZ,
504 dst_mask(dst, WRITEMASK_XZ),
505 inst->SaturateMode, 0, 0,
506 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
507 src_undef(),
508 src_undef());
509 /* Avoid letting negation flag of src0 affect our 1 constant. */
510 swz->SrcReg[0].NegateBase &= ~NEGATE_X;
511 }
512 if (dst.WriteMask & WRITEMASK_W) {
513 /* dst.w = mov src1.w
514 */
515 emit_op(c,
516 OPCODE_MOV,
517 dst_mask(dst, WRITEMASK_W),
518 inst->SaturateMode, 0, 0,
519 src1,
520 src_undef(),
521 src_undef());
522 }
523 }
524
525
526 static void precalc_lit( struct brw_wm_compile *c,
527 const struct prog_instruction *inst )
528 {
529 struct prog_src_register src0 = inst->SrcReg[0];
530 struct prog_dst_register dst = inst->DstReg;
531
532 if (dst.WriteMask & WRITEMASK_XW) {
533 struct prog_instruction *swz;
534
535 /* dst.xw = swz src0.1111
536 */
537 swz = emit_op(c,
538 OPCODE_SWZ,
539 dst_mask(dst, WRITEMASK_XW),
540 0, 0, 0,
541 src_swizzle1(src0, SWIZZLE_ONE),
542 src_undef(),
543 src_undef());
544 /* Avoid letting the negation flag of src0 affect our 1 constant. */
545 swz->SrcReg[0].NegateBase = 0;
546 }
547
548
549 if (dst.WriteMask & WRITEMASK_YZ) {
550 emit_op(c,
551 OPCODE_LIT,
552 dst_mask(dst, WRITEMASK_YZ),
553 inst->SaturateMode, 0, 0,
554 src0,
555 src_undef(),
556 src_undef());
557 }
558 }
559
560 static void precalc_tex( struct brw_wm_compile *c,
561 const struct prog_instruction *inst )
562 {
563 struct prog_src_register coord;
564 struct prog_dst_register tmpcoord;
565 GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
566
567 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
568 struct prog_instruction *out;
569 struct prog_dst_register tmp0 = get_temp(c);
570 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
571 struct prog_dst_register tmp1 = get_temp(c);
572 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
573 struct prog_src_register src0 = inst->SrcReg[0];
574
575 tmpcoord = get_temp(c);
576 coord = src_reg_from_dst(tmpcoord);
577
578 out = emit_op(c, OPCODE_MOV,
579 tmpcoord,
580 0, 0, 0,
581 src0,
582 src_undef(),
583 src_undef());
584 out->SrcReg[0].NegateBase = 0;
585 out->SrcReg[0].Abs = 1;
586
587 emit_op(c, OPCODE_MAX,
588 tmp0,
589 0, 0, 0,
590 src_swizzle1(coord, X),
591 src_swizzle1(coord, Y),
592 src_undef());
593
594 emit_op(c, OPCODE_MAX,
595 tmp1,
596 0, 0, 0,
597 tmp0src,
598 src_swizzle1(coord, Z),
599 src_undef());
600
601 emit_op(c, OPCODE_RCP,
602 tmp0,
603 0, 0, 0,
604 tmp1src,
605 src_undef(),
606 src_undef());
607
608 emit_op(c, OPCODE_MUL,
609 tmpcoord,
610 0, 0, 0,
611 src0,
612 tmp0src,
613 src_undef());
614
615 release_temp(c, tmp0);
616 release_temp(c, tmp1);
617 } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
618 struct prog_src_register scale =
619 search_or_add_param5( c,
620 STATE_INTERNAL,
621 STATE_TEXRECT_SCALE,
622 unit,
623 0,0 );
624
625 tmpcoord = get_temp(c);
626
627 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
628 */
629 emit_op(c,
630 OPCODE_MUL,
631 tmpcoord,
632 0, 0, 0,
633 inst->SrcReg[0],
634 scale,
635 src_undef());
636
637 coord = src_reg_from_dst(tmpcoord);
638 }
639 else {
640 coord = inst->SrcReg[0];
641 }
642
643 /* Need to emit YUV texture conversions by hand. Probably need to
644 * do this here - the alternative is in brw_wm_emit.c, but the
645 * conversion requires allocating a temporary variable which we
646 * don't have the facility to do that late in the compilation.
647 */
648 if (!(c->key.yuvtex_mask & (1<<unit))) {
649 emit_op(c,
650 OPCODE_TEX,
651 inst->DstReg,
652 inst->SaturateMode,
653 unit,
654 inst->TexSrcTarget,
655 coord,
656 src_undef(),
657 src_undef());
658 }
659 else {
660 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
661
662 /*
663 CONST C0 = { -.5, -.0625, -.5, 1.164 }
664 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
665 UYV = TEX ...
666 UYV.xyz = ADD UYV, C0
667 UYV.y = MUL UYV.y, C0.w
668 if (UV swaped)
669 RGB.xyz = MAD UYV.zzx, C1, UYV.y
670 else
671 RGB.xyz = MAD UYV.xxz, C1, UYV.y
672 RGB.y = MAD UYV.z, C1.w, RGB.y
673 */
674 struct prog_dst_register dst = inst->DstReg;
675 struct prog_dst_register tmp = get_temp(c);
676 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
677 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
678 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
679
680 /* tmp = TEX ...
681 */
682 emit_op(c,
683 OPCODE_TEX,
684 tmp,
685 inst->SaturateMode,
686 unit,
687 inst->TexSrcTarget,
688 coord,
689 src_undef(),
690 src_undef());
691
692 /* tmp.xyz = ADD TMP, C0
693 */
694 emit_op(c,
695 OPCODE_ADD,
696 dst_mask(tmp, WRITEMASK_XYZ),
697 0, 0, 0,
698 tmpsrc,
699 C0,
700 src_undef());
701
702 /* YUV.y = MUL YUV.y, C0.w
703 */
704
705 emit_op(c,
706 OPCODE_MUL,
707 dst_mask(tmp, WRITEMASK_Y),
708 0, 0, 0,
709 tmpsrc,
710 src_swizzle1(C0, W),
711 src_undef());
712
713 /*
714 * if (UV swaped)
715 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
716 * else
717 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
718 */
719
720 emit_op(c,
721 OPCODE_MAD,
722 dst_mask(dst, WRITEMASK_XYZ),
723 0, 0, 0,
724 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
725 C1,
726 src_swizzle1(tmpsrc, Y));
727
728 /* RGB.y = MAD YUV.z, C1.w, RGB.y
729 */
730 emit_op(c,
731 OPCODE_MAD,
732 dst_mask(dst, WRITEMASK_Y),
733 0, 0, 0,
734 src_swizzle1(tmpsrc, Z),
735 src_swizzle1(C1, W),
736 src_swizzle1(src_reg_from_dst(dst), Y));
737
738 release_temp(c, tmp);
739 }
740
741 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
742 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
743 release_temp(c, tmpcoord);
744 }
745
746
747 static GLboolean projtex( struct brw_wm_compile *c,
748 const struct prog_instruction *inst )
749 {
750 struct prog_src_register src = inst->SrcReg[0];
751
752 /* Only try to detect the simplest cases. Could detect (later)
753 * cases where we are trying to emit code like RCP {1.0}, MUL x,
754 * {1.0}, and so on.
755 *
756 * More complex cases than this typically only arise from
757 * user-provided fragment programs anyway:
758 */
759 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
760 return 0; /* ut2004 gun rendering !?! */
761 else if (src.File == PROGRAM_INPUT &&
762 GET_SWZ(src.Swizzle, W) == W &&
763 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
764 return 0;
765 else
766 return 1;
767 }
768
769
770 static void precalc_txp( struct brw_wm_compile *c,
771 const struct prog_instruction *inst )
772 {
773 struct prog_src_register src0 = inst->SrcReg[0];
774
775 if (projtex(c, inst)) {
776 struct prog_dst_register tmp = get_temp(c);
777 struct prog_instruction tmp_inst;
778
779 /* tmp0.w = RCP inst.arg[0][3]
780 */
781 emit_op(c,
782 OPCODE_RCP,
783 dst_mask(tmp, WRITEMASK_W),
784 0, 0, 0,
785 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
786 src_undef(),
787 src_undef());
788
789 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
790 */
791 emit_op(c,
792 OPCODE_MUL,
793 dst_mask(tmp, WRITEMASK_XYZ),
794 0, 0, 0,
795 src0,
796 src_swizzle1(src_reg_from_dst(tmp), W),
797 src_undef());
798
799 /* dst = precalc(TEX tmp0)
800 */
801 tmp_inst = *inst;
802 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
803 precalc_tex(c, &tmp_inst);
804
805 release_temp(c, tmp);
806 }
807 else
808 {
809 /* dst = precalc(TEX src0)
810 */
811 precalc_tex(c, inst);
812 }
813 }
814
815
816
817
818
819 /***********************************************************************
820 * Add instructions to perform fog blending
821 */
822
823 static void fog_blend( struct brw_wm_compile *c,
824 struct prog_src_register fog_factor )
825 {
826 struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
827 struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
828
829 /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
830
831 emit_op(c,
832 OPCODE_LRP,
833 dst_mask(outcolor, WRITEMASK_XYZ),
834 0, 0, 0,
835 fog_factor,
836 src_reg_from_dst(outcolor),
837 fogcolor);
838 }
839
840
841
842 /* This one is simple - just take the interpolated fog coordinate and
843 * use it as the fog blend factor.
844 */
845 static void fog_interpolated( struct brw_wm_compile *c )
846 {
847 struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
848
849 if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
850 emit_interp(c, FRAG_ATTRIB_FOGC);
851
852 fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
853 }
854
855 static void emit_fog( struct brw_wm_compile *c )
856 {
857 if (!c->fp->program.FogOption)
858 return;
859
860 if (1)
861 fog_interpolated( c );
862 else {
863 /* TODO: per-pixel fog */
864 assert(0);
865 }
866 }
867
868 static void emit_fb_write( struct brw_wm_compile *c )
869 {
870 struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
871 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
872 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
873 GLuint i;
874
875 struct prog_instruction *inst, *last_inst;
876 struct brw_context *brw = c->func.brw;
877
878 /* inst->Sampler is not used by backend,
879 use it for fb write target and eot */
880
881 if (brw->state.nr_draw_regions > 1) {
882 for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
883 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
884 last_inst = inst = emit_op(c,
885 WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
886 outcolor, payload_r0_depth, outdepth);
887 inst->Sampler = (i<<1);
888 if (c->fp_fragcolor_emitted) {
889 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
890 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
891 0, 0, 0, outcolor, payload_r0_depth, outdepth);
892 inst->Sampler = (i<<1);
893 }
894 }
895 last_inst->Sampler |= 1; //eot
896 }else {
897 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
898 0, 0, 0, outcolor, payload_r0_depth, outdepth);
899 inst->Sampler = 1|(0<<1);
900 }
901 }
902
903
904
905
906 /***********************************************************************
907 * Emit INTERP instructions ahead of first use of each attrib.
908 */
909
910 static void validate_src_regs( struct brw_wm_compile *c,
911 const struct prog_instruction *inst )
912 {
913 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
914 GLuint i;
915
916 for (i = 0; i < nr_args; i++) {
917 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
918 GLuint idx = inst->SrcReg[i].Index;
919 if (!(c->fp_interp_emitted & (1<<idx))) {
920 emit_interp(c, idx);
921 }
922 }
923 }
924 }
925
926 static void validate_dst_regs( struct brw_wm_compile *c,
927 const struct prog_instruction *inst )
928 {
929 if (inst->DstReg.File == PROGRAM_OUTPUT) {
930 GLuint idx = inst->DstReg.Index;
931 if (idx == FRAG_RESULT_COLR)
932 c->fp_fragcolor_emitted = 1;
933 }
934 }
935
936 static void print_insns( const struct prog_instruction *insn,
937 GLuint nr )
938 {
939 GLuint i;
940 for (i = 0; i < nr; i++, insn++) {
941 _mesa_printf("%3d: ", i);
942 if (insn->Opcode < MAX_OPCODE)
943 _mesa_print_instruction(insn);
944 else if (insn->Opcode < MAX_WM_OPCODE) {
945 GLuint idx = insn->Opcode - MAX_OPCODE;
946
947 _mesa_print_alu_instruction(insn,
948 wm_opcode_strings[idx],
949 3);
950 }
951 else
952 _mesa_printf("UNKNOWN\n");
953
954 }
955 }
956
957 void brw_wm_pass_fp( struct brw_wm_compile *c )
958 {
959 struct brw_fragment_program *fp = c->fp;
960 GLuint insn;
961
962 if (INTEL_DEBUG & DEBUG_WM) {
963 _mesa_printf("\n\n\npre-fp:\n");
964 _mesa_print_program(&fp->program.Base);
965 _mesa_printf("\n");
966 }
967
968 c->pixel_xy = src_undef();
969 c->delta_xy = src_undef();
970 c->pixel_w = src_undef();
971 c->nr_fp_insns = 0;
972
973 /* Emit preamble instructions:
974 */
975
976
977 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
978 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
979 validate_src_regs(c, inst);
980 validate_dst_regs(c, inst);
981 }
982 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
983 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
984 struct prog_instruction *out;
985
986 /* Check for INPUT values, emit INTERP instructions where
987 * necessary:
988 */
989
990
991 switch (inst->Opcode) {
992 case OPCODE_SWZ:
993 out = emit_insn(c, inst);
994 out->Opcode = OPCODE_MOV;
995 break;
996
997 case OPCODE_ABS:
998 out = emit_insn(c, inst);
999 out->Opcode = OPCODE_MOV;
1000 out->SrcReg[0].NegateBase = 0;
1001 out->SrcReg[0].Abs = 1;
1002 break;
1003
1004 case OPCODE_SUB:
1005 out = emit_insn(c, inst);
1006 out->Opcode = OPCODE_ADD;
1007 out->SrcReg[1].NegateBase ^= 0xf;
1008 break;
1009
1010 case OPCODE_SCS:
1011 out = emit_insn(c, inst);
1012 /* This should probably be done in the parser.
1013 */
1014 out->DstReg.WriteMask &= WRITEMASK_XY;
1015 break;
1016
1017 case OPCODE_DST:
1018 precalc_dst(c, inst);
1019 break;
1020
1021 case OPCODE_LIT:
1022 precalc_lit(c, inst);
1023 break;
1024
1025 case OPCODE_TEX:
1026 precalc_tex(c, inst);
1027 break;
1028
1029 case OPCODE_TXP:
1030 precalc_txp(c, inst);
1031 break;
1032
1033 case OPCODE_TXB:
1034 out = emit_insn(c, inst);
1035 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1036 break;
1037
1038 case OPCODE_XPD:
1039 out = emit_insn(c, inst);
1040 /* This should probably be done in the parser.
1041 */
1042 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1043 break;
1044
1045 case OPCODE_KIL:
1046 out = emit_insn(c, inst);
1047 /* This should probably be done in the parser.
1048 */
1049 out->DstReg.WriteMask = 0;
1050 break;
1051 case OPCODE_DDX:
1052 emit_ddx(c, inst);
1053 break;
1054 case OPCODE_DDY:
1055 emit_ddy(c, inst);
1056 break;
1057 case OPCODE_END:
1058 emit_fog(c);
1059 emit_fb_write(c);
1060 break;
1061 case OPCODE_PRINT:
1062 break;
1063
1064 default:
1065 emit_insn(c, inst);
1066 break;
1067 }
1068 }
1069
1070 if (INTEL_DEBUG & DEBUG_WM) {
1071 _mesa_printf("\n\n\npass_fp:\n");
1072 print_insns( c->prog_instructions, c->nr_fp_insns );
1073 _mesa_printf("\n");
1074 }
1075 }
1076