Merge branch 'master' of git+ssh://joukj@git.freedesktop.org/git/mesa/mesa
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE"
62 };
63
64 #if 0
65 static const char *wm_file_strings[] = {
66 "PAYLOAD"
67 };
68 #endif
69
70
71 /***********************************************************************
72 * Source regs
73 */
74
75 static struct prog_src_register src_reg(GLuint file, GLuint idx)
76 {
77 struct prog_src_register reg;
78 reg.File = file;
79 reg.Index = idx;
80 reg.Swizzle = SWIZZLE_NOOP;
81 reg.RelAddr = 0;
82 reg.NegateBase = 0;
83 reg.Abs = 0;
84 reg.NegateAbs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114
115 /***********************************************************************
116 * Dest regs
117 */
118
119 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
120 {
121 struct prog_dst_register reg;
122 reg.File = file;
123 reg.Index = idx;
124 reg.WriteMask = WRITEMASK_XYZW;
125 reg.CondMask = 0;
126 reg.CondSwizzle = 0;
127 reg.pad = 0;
128 reg.CondSrc = 0;
129 return reg;
130 }
131
132 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
133 {
134 reg.WriteMask &= mask;
135 return reg;
136 }
137
138 static struct prog_dst_register dst_undef( void )
139 {
140 return dst_reg(PROGRAM_UNDEFINED, 0);
141 }
142
143
144
145 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
146 {
147 int bit = ffs( ~c->fp_temp );
148
149 if (!bit) {
150 _mesa_printf("%s: out of temporaries\n", __FILE__);
151 exit(1);
152 }
153
154 c->fp_temp |= 1<<(bit-1);
155 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
156 }
157
158
159 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
160 {
161 c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
162 }
163
164
165 /***********************************************************************
166 * Instructions
167 */
168
169 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
170 {
171 return &c->prog_instructions[c->nr_fp_insns++];
172 }
173
174 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
175 const struct prog_instruction *inst0)
176 {
177 struct prog_instruction *inst = get_fp_inst(c);
178 *inst = *inst0;
179 inst->Data = (void *)inst0;
180 return inst;
181 }
182
183 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
184 GLuint op,
185 struct prog_dst_register dest,
186 GLuint saturate,
187 GLuint tex_src_unit,
188 GLuint tex_src_target,
189 struct prog_src_register src0,
190 struct prog_src_register src1,
191 struct prog_src_register src2 )
192 {
193 struct prog_instruction *inst = get_fp_inst(c);
194
195 memset(inst, 0, sizeof(*inst));
196
197 inst->Opcode = op;
198 inst->DstReg = dest;
199 inst->SaturateMode = saturate;
200 inst->TexSrcUnit = tex_src_unit;
201 inst->TexSrcTarget = tex_src_target;
202 inst->SrcReg[0] = src0;
203 inst->SrcReg[1] = src1;
204 inst->SrcReg[2] = src2;
205 return inst;
206 }
207
208
209
210
211 /***********************************************************************
212 * Special instructions for interpolation and other tasks
213 */
214
215 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
216 {
217 if (src_is_undef(c->pixel_xy)) {
218 struct prog_dst_register pixel_xy = get_temp(c);
219 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
220
221
222 /* Emit the out calculations, and hold onto the results. Use
223 * two instructions as a temporary is required.
224 */
225 /* pixel_xy.xy = PIXELXY payload[0];
226 */
227 emit_op(c,
228 WM_PIXELXY,
229 dst_mask(pixel_xy, WRITEMASK_XY),
230 0, 0, 0,
231 payload_r0_depth,
232 src_undef(),
233 src_undef());
234
235 c->pixel_xy = src_reg_from_dst(pixel_xy);
236 }
237
238 return c->pixel_xy;
239 }
240
241 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
242 {
243 if (src_is_undef(c->delta_xy)) {
244 struct prog_dst_register delta_xy = get_temp(c);
245 struct prog_src_register pixel_xy = get_pixel_xy(c);
246 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
247
248 /* deltas.xy = DELTAXY pixel_xy, payload[0]
249 */
250 emit_op(c,
251 WM_DELTAXY,
252 dst_mask(delta_xy, WRITEMASK_XY),
253 0, 0, 0,
254 pixel_xy,
255 payload_r0_depth,
256 src_undef());
257
258 c->delta_xy = src_reg_from_dst(delta_xy);
259 }
260
261 return c->delta_xy;
262 }
263
264 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
265 {
266 if (src_is_undef(c->pixel_w)) {
267 struct prog_dst_register pixel_w = get_temp(c);
268 struct prog_src_register deltas = get_delta_xy(c);
269 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
270
271
272 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
273 */
274 emit_op(c,
275 WM_PIXELW,
276 dst_mask(pixel_w, WRITEMASK_W),
277 0, 0, 0,
278 interp_wpos,
279 deltas,
280 src_undef());
281
282
283 c->pixel_w = src_reg_from_dst(pixel_w);
284 }
285
286 return c->pixel_w;
287 }
288
289 static void emit_interp( struct brw_wm_compile *c,
290 GLuint idx )
291 {
292 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
293 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
294 struct prog_src_register deltas = get_delta_xy(c);
295 struct prog_src_register arg2;
296 GLuint opcode;
297
298 /* Need to use PINTERP on attributes which have been
299 * multiplied by 1/W in the SF program, and LINTERP on those
300 * which have not:
301 */
302 switch (idx) {
303 case FRAG_ATTRIB_WPOS:
304 opcode = WM_LINTERP;
305 arg2 = src_undef();
306
307 /* Have to treat wpos.xy specially:
308 */
309 emit_op(c,
310 WM_WPOSXY,
311 dst_mask(dst, WRITEMASK_XY),
312 0, 0, 0,
313 get_pixel_xy(c),
314 src_undef(),
315 src_undef());
316
317 dst = dst_mask(dst, WRITEMASK_ZW);
318
319 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
320 */
321 emit_op(c,
322 WM_LINTERP,
323 dst,
324 0, 0, 0,
325 interp,
326 deltas,
327 arg2);
328 break;
329 case FRAG_ATTRIB_COL0:
330 case FRAG_ATTRIB_COL1:
331 if (c->key.flat_shade) {
332 emit_op(c,
333 WM_CINTERP,
334 dst,
335 0, 0, 0,
336 interp,
337 src_undef(),
338 src_undef());
339 }
340 else {
341 emit_op(c,
342 WM_LINTERP,
343 dst,
344 0, 0, 0,
345 interp,
346 deltas,
347 src_undef());
348 }
349 break;
350 default:
351 emit_op(c,
352 WM_PINTERP,
353 dst,
354 0, 0, 0,
355 interp,
356 deltas,
357 get_pixel_w(c));
358 break;
359 }
360
361 c->fp_interp_emitted |= 1<<idx;
362 }
363
364 static void emit_ddx( struct brw_wm_compile *c,
365 const struct prog_instruction *inst )
366 {
367 GLuint idx = inst->SrcReg[0].Index;
368 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
369
370 c->fp_deriv_emitted |= 1<<idx;
371 emit_op(c,
372 OPCODE_DDX,
373 inst->DstReg,
374 0, 0, 0,
375 interp,
376 get_pixel_w(c),
377 src_undef());
378 }
379
380 static void emit_ddy( struct brw_wm_compile *c,
381 const struct prog_instruction *inst )
382 {
383 GLuint idx = inst->SrcReg[0].Index;
384 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
385
386 c->fp_deriv_emitted |= 1<<idx;
387 emit_op(c,
388 OPCODE_DDY,
389 inst->DstReg,
390 0, 0, 0,
391 interp,
392 get_pixel_w(c),
393 src_undef());
394 }
395
396 /***********************************************************************
397 * Hacks to extend the program parameter and constant lists.
398 */
399
400 /* Add the fog parameters to the parameter list of the original
401 * program, rather than creating a new list. Doesn't really do any
402 * harm and it's not as if the parameter handling isn't a big hack
403 * anyway.
404 */
405 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
406 GLint s0,
407 GLint s1,
408 GLint s2,
409 GLint s3,
410 GLint s4)
411 {
412 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
413 gl_state_index tokens[STATE_LENGTH];
414 GLuint idx;
415 tokens[0] = s0;
416 tokens[1] = s1;
417 tokens[2] = s2;
418 tokens[3] = s3;
419 tokens[4] = s4;
420
421 for (idx = 0; idx < paramList->NumParameters; idx++) {
422 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
423 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
424 return src_reg(PROGRAM_STATE_VAR, idx);
425 }
426
427 idx = _mesa_add_state_reference( paramList, tokens );
428
429 /* Recalculate state dependency:
430 */
431 c->fp->param_state = paramList->StateFlags;
432
433 return src_reg(PROGRAM_STATE_VAR, idx);
434 }
435
436
437 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
438 GLfloat s0,
439 GLfloat s1,
440 GLfloat s2,
441 GLfloat s3)
442 {
443 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
444 GLfloat values[4];
445 GLuint idx;
446 GLuint swizzle;
447
448 values[0] = s0;
449 values[1] = s1;
450 values[2] = s2;
451 values[3] = s3;
452
453 /* Have to search, otherwise multiple compilations will each grow
454 * the parameter list.
455 */
456 for (idx = 0; idx < paramList->NumParameters; idx++) {
457 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
458 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
459
460 /* XXX: this mimics the mesa bug which puts all constants and
461 * parameters into the "PROGRAM_STATE_VAR" category:
462 */
463 return src_reg(PROGRAM_STATE_VAR, idx);
464 }
465
466 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
467 /* XXX what about swizzle? */
468 return src_reg(PROGRAM_STATE_VAR, idx);
469 }
470
471
472
473 /***********************************************************************
474 * Expand various instructions here to simpler forms.
475 */
476 static void precalc_dst( struct brw_wm_compile *c,
477 const struct prog_instruction *inst )
478 {
479 struct prog_src_register src0 = inst->SrcReg[0];
480 struct prog_src_register src1 = inst->SrcReg[1];
481 struct prog_dst_register dst = inst->DstReg;
482
483 if (dst.WriteMask & WRITEMASK_Y) {
484 /* dst.y = mul src0.y, src1.y
485 */
486 emit_op(c,
487 OPCODE_MUL,
488 dst_mask(dst, WRITEMASK_Y),
489 inst->SaturateMode, 0, 0,
490 src0,
491 src1,
492 src_undef());
493 }
494
495
496 if (dst.WriteMask & WRITEMASK_XZ) {
497 GLuint z = GET_SWZ(src0.Swizzle, Z);
498
499 /* dst.xz = swz src0.1zzz
500 */
501 emit_op(c,
502 OPCODE_SWZ,
503 dst_mask(dst, WRITEMASK_XZ),
504 inst->SaturateMode, 0, 0,
505 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
506 src_undef(),
507 src_undef());
508 }
509 if (dst.WriteMask & WRITEMASK_W) {
510 /* dst.w = mov src1.w
511 */
512 emit_op(c,
513 OPCODE_MOV,
514 dst_mask(dst, WRITEMASK_W),
515 inst->SaturateMode, 0, 0,
516 src1,
517 src_undef(),
518 src_undef());
519 }
520 }
521
522
523 static void precalc_lit( struct brw_wm_compile *c,
524 const struct prog_instruction *inst )
525 {
526 struct prog_src_register src0 = inst->SrcReg[0];
527 struct prog_dst_register dst = inst->DstReg;
528
529 if (dst.WriteMask & WRITEMASK_XW) {
530 /* dst.xw = swz src0.1111
531 */
532 emit_op(c,
533 OPCODE_SWZ,
534 dst_mask(dst, WRITEMASK_XW),
535 0, 0, 0,
536 src_swizzle1(src0, SWIZZLE_ONE),
537 src_undef(),
538 src_undef());
539 }
540
541
542 if (dst.WriteMask & WRITEMASK_YZ) {
543 emit_op(c,
544 OPCODE_LIT,
545 dst_mask(dst, WRITEMASK_YZ),
546 inst->SaturateMode, 0, 0,
547 src0,
548 src_undef(),
549 src_undef());
550 }
551 }
552
553 static void precalc_tex( struct brw_wm_compile *c,
554 const struct prog_instruction *inst )
555 {
556 struct prog_src_register coord;
557 struct prog_dst_register tmpcoord;
558
559 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
560 struct prog_instruction *out;
561 struct prog_dst_register tmp0 = get_temp(c);
562 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
563 struct prog_dst_register tmp1 = get_temp(c);
564 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
565 struct prog_src_register src0 = inst->SrcReg[0];
566
567 tmpcoord = get_temp(c);
568 coord = src_reg_from_dst(tmpcoord);
569
570 out = emit_op(c, OPCODE_MOV,
571 tmpcoord,
572 0, 0, 0,
573 src0,
574 src_undef(),
575 src_undef());
576 out->SrcReg[0].NegateBase = 0;
577 out->SrcReg[0].Abs = 1;
578
579 emit_op(c, OPCODE_MAX,
580 tmp0,
581 0, 0, 0,
582 src_swizzle1(coord, X),
583 src_swizzle1(coord, Y),
584 src_undef());
585
586 emit_op(c, OPCODE_MAX,
587 tmp1,
588 0, 0, 0,
589 tmp0src,
590 src_swizzle1(coord, Z),
591 src_undef());
592
593 emit_op(c, OPCODE_RCP,
594 tmp0,
595 0, 0, 0,
596 tmp1src,
597 src_undef(),
598 src_undef());
599
600 emit_op(c, OPCODE_MUL,
601 tmpcoord,
602 0, 0, 0,
603 src0,
604 tmp0src,
605 src_undef());
606
607 release_temp(c, tmp0);
608 release_temp(c, tmp1);
609 } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
610 struct prog_src_register scale =
611 search_or_add_param5( c,
612 STATE_INTERNAL,
613 STATE_TEXRECT_SCALE,
614 inst->TexSrcUnit,
615 0,0 );
616
617 tmpcoord = get_temp(c);
618
619 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
620 */
621 emit_op(c,
622 OPCODE_MUL,
623 tmpcoord,
624 0, 0, 0,
625 inst->SrcReg[0],
626 scale,
627 src_undef());
628
629 coord = src_reg_from_dst(tmpcoord);
630 }
631 else {
632 coord = inst->SrcReg[0];
633 }
634
635 /* Need to emit YUV texture conversions by hand. Probably need to
636 * do this here - the alternative is in brw_wm_emit.c, but the
637 * conversion requires allocating a temporary variable which we
638 * don't have the facility to do that late in the compilation.
639 */
640 if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
641 emit_op(c,
642 OPCODE_TEX,
643 inst->DstReg,
644 inst->SaturateMode,
645 inst->TexSrcUnit,
646 inst->TexSrcTarget,
647 coord,
648 src_undef(),
649 src_undef());
650 }
651 else {
652 /*
653 CONST C0 = { -.5, -.0625, -.5, 1.164 }
654 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
655 UYV = TEX ...
656 UYV.xyz = ADD UYV, C0
657 UYV.y = MUL UYV.y, C0.w
658 RGB.xyz = MAD UYV.xxz, C1, UYV.y
659 RGB.y = MAD UYV.z, C1.w, RGB.y
660 */
661 struct prog_dst_register dst = inst->DstReg;
662 struct prog_src_register src0 = inst->SrcReg[0];
663 struct prog_dst_register tmp = get_temp(c);
664 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
665 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
666 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
667
668 /* tmp = TEX ...
669 */
670 emit_op(c,
671 OPCODE_TEX,
672 tmp,
673 inst->SaturateMode,
674 inst->TexSrcUnit,
675 inst->TexSrcTarget,
676 src0,
677 src_undef(),
678 src_undef());
679
680 /* tmp.xyz = ADD TMP, C0
681 */
682 emit_op(c,
683 OPCODE_ADD,
684 dst_mask(tmp, WRITEMASK_XYZ),
685 0, 0, 0,
686 tmpsrc,
687 C0,
688 src_undef());
689
690 /* YUV.y = MUL YUV.y, C0.w
691 */
692 emit_op(c,
693 OPCODE_MUL,
694 dst_mask(tmp, WRITEMASK_Y),
695 0, 0, 0,
696 tmpsrc,
697 src_swizzle1(C0, W),
698 src_undef());
699
700 /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
701 */
702 emit_op(c,
703 OPCODE_MAD,
704 dst_mask(dst, WRITEMASK_XYZ),
705 0, 0, 0,
706 src_swizzle(tmpsrc, X,X,Z,Z),
707 C1,
708 src_swizzle1(tmpsrc, Y));
709
710 /* RGB.y = MAD YUV.z, C1.w, RGB.y
711 */
712 emit_op(c,
713 OPCODE_MAD,
714 dst_mask(dst, WRITEMASK_Y),
715 0, 0, 0,
716 src_swizzle1(tmpsrc, Z),
717 src_swizzle1(C1, W),
718 src_swizzle1(src_reg_from_dst(dst), Y));
719
720 release_temp(c, tmp);
721 }
722
723 if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV)
724 release_temp(c, tmpcoord);
725 }
726
727
728 static GLboolean projtex( struct brw_wm_compile *c,
729 const struct prog_instruction *inst )
730 {
731 struct prog_src_register src = inst->SrcReg[0];
732
733 /* Only try to detect the simplest cases. Could detect (later)
734 * cases where we are trying to emit code like RCP {1.0}, MUL x,
735 * {1.0}, and so on.
736 *
737 * More complex cases than this typically only arise from
738 * user-provided fragment programs anyway:
739 */
740 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
741 return 0; /* ut2004 gun rendering !?! */
742 else if (src.File == PROGRAM_INPUT &&
743 GET_SWZ(src.Swizzle, W) == W &&
744 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
745 return 0;
746 else
747 return 1;
748 }
749
750
751 static void precalc_txp( struct brw_wm_compile *c,
752 const struct prog_instruction *inst )
753 {
754 struct prog_src_register src0 = inst->SrcReg[0];
755
756 if (projtex(c, inst)) {
757 struct prog_dst_register tmp = get_temp(c);
758 struct prog_instruction tmp_inst;
759
760 /* tmp0.w = RCP inst.arg[0][3]
761 */
762 emit_op(c,
763 OPCODE_RCP,
764 dst_mask(tmp, WRITEMASK_W),
765 0, 0, 0,
766 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
767 src_undef(),
768 src_undef());
769
770 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
771 */
772 emit_op(c,
773 OPCODE_MUL,
774 dst_mask(tmp, WRITEMASK_XYZ),
775 0, 0, 0,
776 src0,
777 src_swizzle1(src_reg_from_dst(tmp), W),
778 src_undef());
779
780 /* dst = precalc(TEX tmp0)
781 */
782 tmp_inst = *inst;
783 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
784 precalc_tex(c, &tmp_inst);
785
786 release_temp(c, tmp);
787 }
788 else
789 {
790 /* dst = precalc(TEX src0)
791 */
792 precalc_tex(c, inst);
793 }
794 }
795
796
797
798
799
800 /***********************************************************************
801 * Add instructions to perform fog blending
802 */
803
804 static void fog_blend( struct brw_wm_compile *c,
805 struct prog_src_register fog_factor )
806 {
807 struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
808 struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
809
810 /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
811
812 emit_op(c,
813 OPCODE_LRP,
814 dst_mask(outcolor, WRITEMASK_XYZ),
815 0, 0, 0,
816 fog_factor,
817 src_reg_from_dst(outcolor),
818 fogcolor);
819 }
820
821
822
823 /* This one is simple - just take the interpolated fog coordinate and
824 * use it as the fog blend factor.
825 */
826 static void fog_interpolated( struct brw_wm_compile *c )
827 {
828 struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
829
830 if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC)))
831 emit_interp(c, FRAG_ATTRIB_FOGC);
832
833 fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
834 }
835
836 static void emit_fog( struct brw_wm_compile *c )
837 {
838 if (!c->fp->program.FogOption)
839 return;
840
841 if (1)
842 fog_interpolated( c );
843 else {
844 /* TODO: per-pixel fog */
845 assert(0);
846 }
847 }
848
849 static void emit_fb_write( struct brw_wm_compile *c )
850 {
851 struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
852 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
853 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
854
855 emit_op(c,
856 WM_FB_WRITE,
857 dst_mask(dst_undef(),0),
858 0, 0, 0,
859 outcolor,
860 payload_r0_depth,
861 outdepth);
862 }
863
864
865
866
867 /***********************************************************************
868 * Emit INTERP instructions ahead of first use of each attrib.
869 */
870
871 static void validate_src_regs( struct brw_wm_compile *c,
872 const struct prog_instruction *inst )
873 {
874 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
875 GLuint i;
876
877 for (i = 0; i < nr_args; i++) {
878 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
879 GLuint idx = inst->SrcReg[i].Index;
880 if (!(c->fp_interp_emitted & (1<<idx))) {
881 emit_interp(c, idx);
882 }
883 }
884 }
885 }
886
887
888
889 static void print_insns( const struct prog_instruction *insn,
890 GLuint nr )
891 {
892 GLuint i;
893 for (i = 0; i < nr; i++, insn++) {
894 _mesa_printf("%3d: ", i);
895 if (insn->Opcode < MAX_OPCODE)
896 _mesa_print_instruction(insn);
897 else if (insn->Opcode < MAX_WM_OPCODE) {
898 GLuint idx = insn->Opcode - MAX_OPCODE;
899
900 _mesa_print_alu_instruction(insn,
901 wm_opcode_strings[idx],
902 3);
903 }
904 else
905 _mesa_printf("UNKNOWN\n");
906
907 }
908 }
909
910 void brw_wm_pass_fp( struct brw_wm_compile *c )
911 {
912 struct brw_fragment_program *fp = c->fp;
913 GLuint insn;
914
915 if (INTEL_DEBUG & DEBUG_WM) {
916 _mesa_printf("\n\n\npre-fp:\n");
917 _mesa_print_program(&fp->program.Base);
918 _mesa_printf("\n");
919 }
920
921 c->pixel_xy = src_undef();
922 c->delta_xy = src_undef();
923 c->pixel_w = src_undef();
924 c->nr_fp_insns = 0;
925
926 /* Emit preamble instructions:
927 */
928
929
930 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
931 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
932 struct prog_instruction *out;
933
934 /* Check for INPUT values, emit INTERP instructions where
935 * necessary:
936 */
937 validate_src_regs(c, inst);
938
939
940 switch (inst->Opcode) {
941 case OPCODE_SWZ:
942 out = emit_insn(c, inst);
943 out->Opcode = OPCODE_MOV;
944 break;
945
946 case OPCODE_ABS:
947 out = emit_insn(c, inst);
948 out->Opcode = OPCODE_MOV;
949 out->SrcReg[0].NegateBase = 0;
950 out->SrcReg[0].Abs = 1;
951 break;
952
953 case OPCODE_SUB:
954 out = emit_insn(c, inst);
955 out->Opcode = OPCODE_ADD;
956 out->SrcReg[1].NegateBase ^= 0xf;
957 break;
958
959 case OPCODE_SCS:
960 out = emit_insn(c, inst);
961 /* This should probably be done in the parser.
962 */
963 out->DstReg.WriteMask &= WRITEMASK_XY;
964 break;
965
966 case OPCODE_DST:
967 precalc_dst(c, inst);
968 break;
969
970 case OPCODE_LIT:
971 precalc_lit(c, inst);
972 break;
973
974 case OPCODE_TXP:
975 precalc_txp(c, inst);
976 break;
977
978 case OPCODE_XPD:
979 out = emit_insn(c, inst);
980 /* This should probably be done in the parser.
981 */
982 out->DstReg.WriteMask &= WRITEMASK_XYZ;
983 break;
984
985 case OPCODE_KIL:
986 out = emit_insn(c, inst);
987 /* This should probably be done in the parser.
988 */
989 out->DstReg.WriteMask = 0;
990 break;
991 case OPCODE_DDX:
992 emit_ddx(c, inst);
993 break;
994 case OPCODE_DDY:
995 emit_ddy(c, inst);
996 break;
997 case OPCODE_END:
998 emit_fog(c);
999 emit_fb_write(c);
1000 break;
1001 case OPCODE_PRINT:
1002 break;
1003
1004 default:
1005 emit_insn(c, inst);
1006 break;
1007 }
1008 }
1009
1010 if (INTEL_DEBUG & DEBUG_WM) {
1011 _mesa_printf("\n\n\npass_fp:\n");
1012 print_insns( c->prog_instructions, c->nr_fp_insns );
1013 _mesa_printf("\n");
1014 }
1015 }
1016