i965: rewrite the code for handling shader subroutine calls
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_fp.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/glheader.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "brw_context.h"
37 #include "brw_wm.h"
38 #include "brw_util.h"
39
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
43
44
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
46
47 #define X 0
48 #define Y 1
49 #define Z 2
50 #define W 3
51
52
53 static const char *wm_opcode_strings[] = {
54 "PIXELXY",
55 "DELTAXY",
56 "PIXELW",
57 "LINTERP",
58 "PINTERP",
59 "CINTERP",
60 "WPOSXY",
61 "FB_WRITE"
62 };
63
64 #if 0
65 static const char *wm_file_strings[] = {
66 "PAYLOAD"
67 };
68 #endif
69
70
71 /***********************************************************************
72 * Source regs
73 */
74
75 static struct prog_src_register src_reg(GLuint file, GLuint idx)
76 {
77 struct prog_src_register reg;
78 reg.File = file;
79 reg.Index = idx;
80 reg.Swizzle = SWIZZLE_NOOP;
81 reg.RelAddr = 0;
82 reg.NegateBase = 0;
83 reg.Abs = 0;
84 reg.NegateAbs = 0;
85 return reg;
86 }
87
88 static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
89 {
90 return src_reg(dst.File, dst.Index);
91 }
92
93 static struct prog_src_register src_undef( void )
94 {
95 return src_reg(PROGRAM_UNDEFINED, 0);
96 }
97
98 static GLboolean src_is_undef(struct prog_src_register src)
99 {
100 return src.File == PROGRAM_UNDEFINED;
101 }
102
103 static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
104 {
105 reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
106 return reg;
107 }
108
109 static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
110 {
111 return src_swizzle(reg, x, x, x, x);
112 }
113
114 static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
115 {
116 reg.Swizzle = swizzle;
117 return reg;
118 }
119
120
121 /***********************************************************************
122 * Dest regs
123 */
124
125 static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
126 {
127 struct prog_dst_register reg;
128 reg.File = file;
129 reg.Index = idx;
130 reg.WriteMask = WRITEMASK_XYZW;
131 reg.RelAddr = 0;
132 reg.CondMask = 0;
133 reg.CondSwizzle = 0;
134 reg.CondSrc = 0;
135 reg.pad = 0;
136 return reg;
137 }
138
139 static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
140 {
141 reg.WriteMask &= mask;
142 return reg;
143 }
144
145 static struct prog_dst_register dst_undef( void )
146 {
147 return dst_reg(PROGRAM_UNDEFINED, 0);
148 }
149
150
151
152 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
153 {
154 int bit = _mesa_ffs( ~c->fp_temp );
155
156 if (!bit) {
157 _mesa_printf("%s: out of temporaries\n", __FILE__);
158 exit(1);
159 }
160
161 c->fp_temp |= 1<<(bit-1);
162 return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
163 }
164
165
166 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
167 {
168 c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
169 }
170
171
172 /***********************************************************************
173 * Instructions
174 */
175
176 static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
177 {
178 return &c->prog_instructions[c->nr_fp_insns++];
179 }
180
181 static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
182 const struct prog_instruction *inst0)
183 {
184 struct prog_instruction *inst = get_fp_inst(c);
185 *inst = *inst0;
186 return inst;
187 }
188
189 static struct prog_instruction * emit_op(struct brw_wm_compile *c,
190 GLuint op,
191 struct prog_dst_register dest,
192 GLuint saturate,
193 GLuint tex_src_unit,
194 GLuint tex_src_target,
195 struct prog_src_register src0,
196 struct prog_src_register src1,
197 struct prog_src_register src2 )
198 {
199 struct prog_instruction *inst = get_fp_inst(c);
200
201 memset(inst, 0, sizeof(*inst));
202
203 inst->Opcode = op;
204 inst->DstReg = dest;
205 inst->SaturateMode = saturate;
206 inst->TexSrcUnit = tex_src_unit;
207 inst->TexSrcTarget = tex_src_target;
208 inst->SrcReg[0] = src0;
209 inst->SrcReg[1] = src1;
210 inst->SrcReg[2] = src2;
211 return inst;
212 }
213
214
215
216
217 /***********************************************************************
218 * Special instructions for interpolation and other tasks
219 */
220
221 static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
222 {
223 if (src_is_undef(c->pixel_xy)) {
224 struct prog_dst_register pixel_xy = get_temp(c);
225 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
226
227
228 /* Emit the out calculations, and hold onto the results. Use
229 * two instructions as a temporary is required.
230 */
231 /* pixel_xy.xy = PIXELXY payload[0];
232 */
233 emit_op(c,
234 WM_PIXELXY,
235 dst_mask(pixel_xy, WRITEMASK_XY),
236 0, 0, 0,
237 payload_r0_depth,
238 src_undef(),
239 src_undef());
240
241 c->pixel_xy = src_reg_from_dst(pixel_xy);
242 }
243
244 return c->pixel_xy;
245 }
246
247 static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
248 {
249 if (src_is_undef(c->delta_xy)) {
250 struct prog_dst_register delta_xy = get_temp(c);
251 struct prog_src_register pixel_xy = get_pixel_xy(c);
252 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
253
254 /* deltas.xy = DELTAXY pixel_xy, payload[0]
255 */
256 emit_op(c,
257 WM_DELTAXY,
258 dst_mask(delta_xy, WRITEMASK_XY),
259 0, 0, 0,
260 pixel_xy,
261 payload_r0_depth,
262 src_undef());
263
264 c->delta_xy = src_reg_from_dst(delta_xy);
265 }
266
267 return c->delta_xy;
268 }
269
270 static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
271 {
272 if (src_is_undef(c->pixel_w)) {
273 struct prog_dst_register pixel_w = get_temp(c);
274 struct prog_src_register deltas = get_delta_xy(c);
275 struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
276
277
278 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
279 */
280 emit_op(c,
281 WM_PIXELW,
282 dst_mask(pixel_w, WRITEMASK_W),
283 0, 0, 0,
284 interp_wpos,
285 deltas,
286 src_undef());
287
288
289 c->pixel_w = src_reg_from_dst(pixel_w);
290 }
291
292 return c->pixel_w;
293 }
294
295 static void emit_interp( struct brw_wm_compile *c,
296 GLuint idx )
297 {
298 struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
299 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
300 struct prog_src_register deltas = get_delta_xy(c);
301 struct prog_src_register arg2;
302 GLuint opcode;
303
304 /* Need to use PINTERP on attributes which have been
305 * multiplied by 1/W in the SF program, and LINTERP on those
306 * which have not:
307 */
308 switch (idx) {
309 case FRAG_ATTRIB_WPOS:
310 opcode = WM_LINTERP;
311 arg2 = src_undef();
312
313 /* Have to treat wpos.xy specially:
314 */
315 emit_op(c,
316 WM_WPOSXY,
317 dst_mask(dst, WRITEMASK_XY),
318 0, 0, 0,
319 get_pixel_xy(c),
320 src_undef(),
321 src_undef());
322
323 dst = dst_mask(dst, WRITEMASK_ZW);
324
325 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
326 */
327 emit_op(c,
328 WM_LINTERP,
329 dst,
330 0, 0, 0,
331 interp,
332 deltas,
333 arg2);
334 break;
335 case FRAG_ATTRIB_COL0:
336 case FRAG_ATTRIB_COL1:
337 if (c->key.flat_shade) {
338 emit_op(c,
339 WM_CINTERP,
340 dst,
341 0, 0, 0,
342 interp,
343 src_undef(),
344 src_undef());
345 }
346 else {
347 emit_op(c,
348 WM_LINTERP,
349 dst,
350 0, 0, 0,
351 interp,
352 deltas,
353 src_undef());
354 }
355 break;
356 default:
357 emit_op(c,
358 WM_PINTERP,
359 dst,
360 0, 0, 0,
361 interp,
362 deltas,
363 get_pixel_w(c));
364 break;
365 }
366
367 c->fp_interp_emitted |= 1<<idx;
368 }
369
370 static void emit_ddx( struct brw_wm_compile *c,
371 const struct prog_instruction *inst )
372 {
373 GLuint idx = inst->SrcReg[0].Index;
374 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
375
376 c->fp_deriv_emitted |= 1<<idx;
377 emit_op(c,
378 OPCODE_DDX,
379 inst->DstReg,
380 0, 0, 0,
381 interp,
382 get_pixel_w(c),
383 src_undef());
384 }
385
386 static void emit_ddy( struct brw_wm_compile *c,
387 const struct prog_instruction *inst )
388 {
389 GLuint idx = inst->SrcReg[0].Index;
390 struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
391
392 c->fp_deriv_emitted |= 1<<idx;
393 emit_op(c,
394 OPCODE_DDY,
395 inst->DstReg,
396 0, 0, 0,
397 interp,
398 get_pixel_w(c),
399 src_undef());
400 }
401
402 /***********************************************************************
403 * Hacks to extend the program parameter and constant lists.
404 */
405
406 /* Add the fog parameters to the parameter list of the original
407 * program, rather than creating a new list. Doesn't really do any
408 * harm and it's not as if the parameter handling isn't a big hack
409 * anyway.
410 */
411 static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c,
412 GLint s0,
413 GLint s1,
414 GLint s2,
415 GLint s3,
416 GLint s4)
417 {
418 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
419 gl_state_index tokens[STATE_LENGTH];
420 GLuint idx;
421 tokens[0] = s0;
422 tokens[1] = s1;
423 tokens[2] = s2;
424 tokens[3] = s3;
425 tokens[4] = s4;
426
427 for (idx = 0; idx < paramList->NumParameters; idx++) {
428 if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
429 memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
430 return src_reg(PROGRAM_STATE_VAR, idx);
431 }
432
433 idx = _mesa_add_state_reference( paramList, tokens );
434
435 return src_reg(PROGRAM_STATE_VAR, idx);
436 }
437
438
439 static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
440 GLfloat s0,
441 GLfloat s1,
442 GLfloat s2,
443 GLfloat s3)
444 {
445 struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
446 GLfloat values[4];
447 GLuint idx;
448 GLuint swizzle;
449
450 values[0] = s0;
451 values[1] = s1;
452 values[2] = s2;
453 values[3] = s3;
454
455 /* Have to search, otherwise multiple compilations will each grow
456 * the parameter list.
457 */
458 for (idx = 0; idx < paramList->NumParameters; idx++) {
459 if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
460 memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
461
462 /* XXX: this mimics the mesa bug which puts all constants and
463 * parameters into the "PROGRAM_STATE_VAR" category:
464 */
465 return src_reg(PROGRAM_STATE_VAR, idx);
466 }
467
468 idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
469 assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
470 return src_reg(PROGRAM_STATE_VAR, idx);
471 }
472
473
474
475 /***********************************************************************
476 * Expand various instructions here to simpler forms.
477 */
478 static void precalc_dst( struct brw_wm_compile *c,
479 const struct prog_instruction *inst )
480 {
481 struct prog_src_register src0 = inst->SrcReg[0];
482 struct prog_src_register src1 = inst->SrcReg[1];
483 struct prog_dst_register dst = inst->DstReg;
484
485 if (dst.WriteMask & WRITEMASK_Y) {
486 /* dst.y = mul src0.y, src1.y
487 */
488 emit_op(c,
489 OPCODE_MUL,
490 dst_mask(dst, WRITEMASK_Y),
491 inst->SaturateMode, 0, 0,
492 src0,
493 src1,
494 src_undef());
495 }
496
497
498 if (dst.WriteMask & WRITEMASK_XZ) {
499 struct prog_instruction *swz;
500 GLuint z = GET_SWZ(src0.Swizzle, Z);
501
502 /* dst.xz = swz src0.1zzz
503 */
504 swz = emit_op(c,
505 OPCODE_SWZ,
506 dst_mask(dst, WRITEMASK_XZ),
507 inst->SaturateMode, 0, 0,
508 src_swizzle(src0, SWIZZLE_ONE, z, z, z),
509 src_undef(),
510 src_undef());
511 /* Avoid letting negation flag of src0 affect our 1 constant. */
512 swz->SrcReg[0].NegateBase &= ~NEGATE_X;
513 }
514 if (dst.WriteMask & WRITEMASK_W) {
515 /* dst.w = mov src1.w
516 */
517 emit_op(c,
518 OPCODE_MOV,
519 dst_mask(dst, WRITEMASK_W),
520 inst->SaturateMode, 0, 0,
521 src1,
522 src_undef(),
523 src_undef());
524 }
525 }
526
527
528 static void precalc_lit( struct brw_wm_compile *c,
529 const struct prog_instruction *inst )
530 {
531 struct prog_src_register src0 = inst->SrcReg[0];
532 struct prog_dst_register dst = inst->DstReg;
533
534 if (dst.WriteMask & WRITEMASK_XW) {
535 struct prog_instruction *swz;
536
537 /* dst.xw = swz src0.1111
538 */
539 swz = emit_op(c,
540 OPCODE_SWZ,
541 dst_mask(dst, WRITEMASK_XW),
542 0, 0, 0,
543 src_swizzle1(src0, SWIZZLE_ONE),
544 src_undef(),
545 src_undef());
546 /* Avoid letting the negation flag of src0 affect our 1 constant. */
547 swz->SrcReg[0].NegateBase = 0;
548 }
549
550
551 if (dst.WriteMask & WRITEMASK_YZ) {
552 emit_op(c,
553 OPCODE_LIT,
554 dst_mask(dst, WRITEMASK_YZ),
555 inst->SaturateMode, 0, 0,
556 src0,
557 src_undef(),
558 src_undef());
559 }
560 }
561
562
563 /**
564 * Some TEX instructions require extra code, cube map coordinate
565 * normalization, or coordinate scaling for RECT textures, etc.
566 * This function emits those extra instructions and the TEX
567 * instruction itself.
568 */
569 static void precalc_tex( struct brw_wm_compile *c,
570 const struct prog_instruction *inst )
571 {
572 struct prog_src_register coord;
573 struct prog_dst_register tmpcoord;
574 const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
575
576 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
577 struct prog_instruction *out;
578 struct prog_dst_register tmp0 = get_temp(c);
579 struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
580 struct prog_dst_register tmp1 = get_temp(c);
581 struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
582 struct prog_src_register src0 = inst->SrcReg[0];
583
584 /* find longest component of coord vector and normalize it */
585 tmpcoord = get_temp(c);
586 coord = src_reg_from_dst(tmpcoord);
587
588 /* tmpcoord = src0 (i.e.: coord = src0) */
589 out = emit_op(c, OPCODE_MOV,
590 tmpcoord,
591 0, 0, 0,
592 src0,
593 src_undef(),
594 src_undef());
595 out->SrcReg[0].NegateBase = 0;
596 out->SrcReg[0].Abs = 1;
597
598 /* tmp0 = MAX(coord.X, coord.Y) */
599 emit_op(c, OPCODE_MAX,
600 tmp0,
601 0, 0, 0,
602 src_swizzle1(coord, X),
603 src_swizzle1(coord, Y),
604 src_undef());
605
606 /* tmp1 = MAX(tmp0, coord.Z) */
607 emit_op(c, OPCODE_MAX,
608 tmp1,
609 0, 0, 0,
610 tmp0src,
611 src_swizzle1(coord, Z),
612 src_undef());
613
614 /* tmp0 = 1 / tmp1 */
615 emit_op(c, OPCODE_RCP,
616 tmp0,
617 0, 0, 0,
618 tmp1src,
619 src_undef(),
620 src_undef());
621
622 /* tmpCoord = src0 * tmp0 */
623 emit_op(c, OPCODE_MUL,
624 tmpcoord,
625 0, 0, 0,
626 src0,
627 tmp0src,
628 src_undef());
629
630 release_temp(c, tmp0);
631 release_temp(c, tmp1);
632 }
633 else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
634 struct prog_src_register scale =
635 search_or_add_param5( c,
636 STATE_INTERNAL,
637 STATE_TEXRECT_SCALE,
638 unit,
639 0,0 );
640
641 tmpcoord = get_temp(c);
642
643 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
644 */
645 emit_op(c,
646 OPCODE_MUL,
647 tmpcoord,
648 0, 0, 0,
649 inst->SrcReg[0],
650 scale,
651 src_undef());
652
653 coord = src_reg_from_dst(tmpcoord);
654 }
655 else {
656 coord = inst->SrcReg[0];
657 }
658
659 /* Need to emit YUV texture conversions by hand. Probably need to
660 * do this here - the alternative is in brw_wm_emit.c, but the
661 * conversion requires allocating a temporary variable which we
662 * don't have the facility to do that late in the compilation.
663 */
664 if (c->key.yuvtex_mask & (1 << unit)) {
665 /* convert ycbcr to RGBA */
666 GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
667
668 /*
669 CONST C0 = { -.5, -.0625, -.5, 1.164 }
670 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
671 UYV = TEX ...
672 UYV.xyz = ADD UYV, C0
673 UYV.y = MUL UYV.y, C0.w
674 if (UV swaped)
675 RGB.xyz = MAD UYV.zzx, C1, UYV.y
676 else
677 RGB.xyz = MAD UYV.xxz, C1, UYV.y
678 RGB.y = MAD UYV.z, C1.w, RGB.y
679 */
680 struct prog_dst_register dst = inst->DstReg;
681 struct prog_dst_register tmp = get_temp(c);
682 struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
683 struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 );
684 struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
685
686 /* tmp = TEX ...
687 */
688 emit_op(c,
689 OPCODE_TEX,
690 tmp,
691 inst->SaturateMode,
692 unit,
693 inst->TexSrcTarget,
694 coord,
695 src_undef(),
696 src_undef());
697
698 /* tmp.xyz = ADD TMP, C0
699 */
700 emit_op(c,
701 OPCODE_ADD,
702 dst_mask(tmp, WRITEMASK_XYZ),
703 0, 0, 0,
704 tmpsrc,
705 C0,
706 src_undef());
707
708 /* YUV.y = MUL YUV.y, C0.w
709 */
710
711 emit_op(c,
712 OPCODE_MUL,
713 dst_mask(tmp, WRITEMASK_Y),
714 0, 0, 0,
715 tmpsrc,
716 src_swizzle1(C0, W),
717 src_undef());
718
719 /*
720 * if (UV swaped)
721 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
722 * else
723 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
724 */
725
726 emit_op(c,
727 OPCODE_MAD,
728 dst_mask(dst, WRITEMASK_XYZ),
729 0, 0, 0,
730 swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
731 C1,
732 src_swizzle1(tmpsrc, Y));
733
734 /* RGB.y = MAD YUV.z, C1.w, RGB.y
735 */
736 emit_op(c,
737 OPCODE_MAD,
738 dst_mask(dst, WRITEMASK_Y),
739 0, 0, 0,
740 src_swizzle1(tmpsrc, Z),
741 src_swizzle1(C1, W),
742 src_swizzle1(src_reg_from_dst(dst), Y));
743
744 release_temp(c, tmp);
745 }
746 else {
747 /* ordinary RGBA tex instruction */
748 emit_op(c,
749 OPCODE_TEX,
750 inst->DstReg,
751 inst->SaturateMode,
752 unit,
753 inst->TexSrcTarget,
754 coord,
755 src_undef(),
756 src_undef());
757 }
758
759 /* For GL_EXT_texture_swizzle: */
760 if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
761 /* swizzle the result of the TEX instruction */
762 struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
763 emit_op(c, OPCODE_SWZ,
764 inst->DstReg,
765 SATURATE_OFF, /* saturate already done above */
766 0, 0, /* tex unit, target N/A */
767 src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
768 src_undef(),
769 src_undef());
770 }
771
772 if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
773 (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
774 release_temp(c, tmpcoord);
775 }
776
777
778 static GLboolean projtex( struct brw_wm_compile *c,
779 const struct prog_instruction *inst )
780 {
781 struct prog_src_register src = inst->SrcReg[0];
782
783 /* Only try to detect the simplest cases. Could detect (later)
784 * cases where we are trying to emit code like RCP {1.0}, MUL x,
785 * {1.0}, and so on.
786 *
787 * More complex cases than this typically only arise from
788 * user-provided fragment programs anyway:
789 */
790 if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
791 return 0; /* ut2004 gun rendering !?! */
792 else if (src.File == PROGRAM_INPUT &&
793 GET_SWZ(src.Swizzle, W) == W &&
794 (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
795 return 0;
796 else
797 return 1;
798 }
799
800
801 static void precalc_txp( struct brw_wm_compile *c,
802 const struct prog_instruction *inst )
803 {
804 struct prog_src_register src0 = inst->SrcReg[0];
805
806 if (projtex(c, inst)) {
807 struct prog_dst_register tmp = get_temp(c);
808 struct prog_instruction tmp_inst;
809
810 /* tmp0.w = RCP inst.arg[0][3]
811 */
812 emit_op(c,
813 OPCODE_RCP,
814 dst_mask(tmp, WRITEMASK_W),
815 0, 0, 0,
816 src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
817 src_undef(),
818 src_undef());
819
820 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
821 */
822 emit_op(c,
823 OPCODE_MUL,
824 dst_mask(tmp, WRITEMASK_XYZ),
825 0, 0, 0,
826 src0,
827 src_swizzle1(src_reg_from_dst(tmp), W),
828 src_undef());
829
830 /* dst = precalc(TEX tmp0)
831 */
832 tmp_inst = *inst;
833 tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
834 precalc_tex(c, &tmp_inst);
835
836 release_temp(c, tmp);
837 }
838 else
839 {
840 /* dst = precalc(TEX src0)
841 */
842 precalc_tex(c, inst);
843 }
844 }
845
846
847
848 static void emit_fb_write( struct brw_wm_compile *c )
849 {
850 struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
851 struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
852 struct prog_src_register outcolor;
853 GLuint i;
854
855 struct prog_instruction *inst, *last_inst;
856 struct brw_context *brw = c->func.brw;
857
858 /* inst->Sampler is not used by backend,
859 use it for fb write target and eot */
860
861 if (brw->state.nr_draw_regions > 1) {
862 for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
863 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
864 last_inst = inst = emit_op(c,
865 WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
866 outcolor, payload_r0_depth, outdepth);
867 inst->Sampler = (i<<1);
868 if (c->fp_fragcolor_emitted) {
869 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
870 last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
871 0, 0, 0, outcolor, payload_r0_depth, outdepth);
872 inst->Sampler = (i<<1);
873 }
874 }
875 last_inst->Sampler |= 1; //eot
876 }
877 else {
878 /* if gl_FragData[0] is written, use it, else use gl_FragColor */
879 if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
880 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
881 else
882 outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
883
884 inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
885 0, 0, 0, outcolor, payload_r0_depth, outdepth);
886 inst->Sampler = 1|(0<<1);
887 }
888 }
889
890
891
892
893 /***********************************************************************
894 * Emit INTERP instructions ahead of first use of each attrib.
895 */
896
897 static void validate_src_regs( struct brw_wm_compile *c,
898 const struct prog_instruction *inst )
899 {
900 GLuint nr_args = brw_wm_nr_args( inst->Opcode );
901 GLuint i;
902
903 for (i = 0; i < nr_args; i++) {
904 if (inst->SrcReg[i].File == PROGRAM_INPUT) {
905 GLuint idx = inst->SrcReg[i].Index;
906 if (!(c->fp_interp_emitted & (1<<idx))) {
907 emit_interp(c, idx);
908 }
909 }
910 }
911 }
912
913 static void validate_dst_regs( struct brw_wm_compile *c,
914 const struct prog_instruction *inst )
915 {
916 if (inst->DstReg.File == PROGRAM_OUTPUT) {
917 GLuint idx = inst->DstReg.Index;
918 if (idx == FRAG_RESULT_COLR)
919 c->fp_fragcolor_emitted = 1;
920 }
921 }
922
923 static void print_insns( const struct prog_instruction *insn,
924 GLuint nr )
925 {
926 GLuint i;
927 for (i = 0; i < nr; i++, insn++) {
928 _mesa_printf("%3d: ", i);
929 if (insn->Opcode < MAX_OPCODE)
930 _mesa_print_instruction(insn);
931 else if (insn->Opcode < MAX_WM_OPCODE) {
932 GLuint idx = insn->Opcode - MAX_OPCODE;
933
934 _mesa_print_alu_instruction(insn,
935 wm_opcode_strings[idx],
936 3);
937 }
938 else
939 _mesa_printf("UNKNOWN\n");
940
941 }
942 }
943
944
945 /**
946 * Initial pass for fragment program code generation.
947 * This function is used by both the GLSL and non-GLSL paths.
948 */
949 void brw_wm_pass_fp( struct brw_wm_compile *c )
950 {
951 struct brw_fragment_program *fp = c->fp;
952 GLuint insn;
953
954 if (INTEL_DEBUG & DEBUG_WM) {
955 _mesa_printf("pre-fp:\n");
956 _mesa_print_program(&fp->program.Base);
957 _mesa_printf("\n");
958 }
959
960 c->pixel_xy = src_undef();
961 c->delta_xy = src_undef();
962 c->pixel_w = src_undef();
963 c->nr_fp_insns = 0;
964
965 /* Emit preamble instructions. This is where special instructions such as
966 * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
967 * compute shader inputs from varying vars.
968 */
969 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
970 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
971 validate_src_regs(c, inst);
972 validate_dst_regs(c, inst);
973 }
974
975 /* Loop over all instructions doing assorted simplifications and
976 * transformations.
977 */
978 for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
979 const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
980 struct prog_instruction *out;
981
982 /* Check for INPUT values, emit INTERP instructions where
983 * necessary:
984 */
985
986 switch (inst->Opcode) {
987 case OPCODE_SWZ:
988 out = emit_insn(c, inst);
989 out->Opcode = OPCODE_MOV;
990 break;
991
992 case OPCODE_ABS:
993 out = emit_insn(c, inst);
994 out->Opcode = OPCODE_MOV;
995 out->SrcReg[0].NegateBase = 0;
996 out->SrcReg[0].Abs = 1;
997 break;
998
999 case OPCODE_SUB:
1000 out = emit_insn(c, inst);
1001 out->Opcode = OPCODE_ADD;
1002 out->SrcReg[1].NegateBase ^= 0xf;
1003 break;
1004
1005 case OPCODE_SCS:
1006 out = emit_insn(c, inst);
1007 /* This should probably be done in the parser.
1008 */
1009 out->DstReg.WriteMask &= WRITEMASK_XY;
1010 break;
1011
1012 case OPCODE_DST:
1013 precalc_dst(c, inst);
1014 break;
1015
1016 case OPCODE_LIT:
1017 precalc_lit(c, inst);
1018 break;
1019
1020 case OPCODE_TEX:
1021 precalc_tex(c, inst);
1022 break;
1023
1024 case OPCODE_TXP:
1025 precalc_txp(c, inst);
1026 break;
1027
1028 case OPCODE_TXB:
1029 out = emit_insn(c, inst);
1030 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
1031 break;
1032
1033 case OPCODE_XPD:
1034 out = emit_insn(c, inst);
1035 /* This should probably be done in the parser.
1036 */
1037 out->DstReg.WriteMask &= WRITEMASK_XYZ;
1038 break;
1039
1040 case OPCODE_KIL:
1041 out = emit_insn(c, inst);
1042 /* This should probably be done in the parser.
1043 */
1044 out->DstReg.WriteMask = 0;
1045 break;
1046 case OPCODE_DDX:
1047 emit_ddx(c, inst);
1048 break;
1049 case OPCODE_DDY:
1050 emit_ddy(c, inst);
1051 break;
1052 case OPCODE_END:
1053 emit_fb_write(c);
1054 break;
1055 case OPCODE_PRINT:
1056 break;
1057
1058 default:
1059 emit_insn(c, inst);
1060 break;
1061 }
1062 }
1063
1064 if (INTEL_DEBUG & DEBUG_WM) {
1065 _mesa_printf("pass_fp:\n");
1066 print_insns( c->prog_instructions, c->nr_fp_insns );
1067 _mesa_printf("\n");
1068 }
1069 }
1070