2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
36 #include "brw_context.h"
40 #include "shader/prog_parameter.h"
41 #include "shader/prog_print.h"
42 #include "shader/prog_statevars.h"
45 #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
53 static const char *wm_opcode_strings
[] = {
65 static const char *wm_file_strings
[] = {
71 /***********************************************************************
75 static struct prog_src_register
src_reg(GLuint file
, GLuint idx
)
77 struct prog_src_register reg
;
80 reg
.Swizzle
= SWIZZLE_NOOP
;
88 static struct prog_src_register
src_reg_from_dst(struct prog_dst_register dst
)
90 return src_reg(dst
.File
, dst
.Index
);
93 static struct prog_src_register
src_undef( void )
95 return src_reg(PROGRAM_UNDEFINED
, 0);
98 static GLboolean
src_is_undef(struct prog_src_register src
)
100 return src
.File
== PROGRAM_UNDEFINED
;
103 static struct prog_src_register
src_swizzle( struct prog_src_register reg
, int x
, int y
, int z
, int w
)
105 reg
.Swizzle
= MAKE_SWIZZLE4(x
,y
,z
,w
);
109 static struct prog_src_register
src_swizzle1( struct prog_src_register reg
, int x
)
111 return src_swizzle(reg
, x
, x
, x
, x
);
115 /***********************************************************************
119 static struct prog_dst_register
dst_reg(GLuint file
, GLuint idx
)
121 struct prog_dst_register reg
;
124 reg
.WriteMask
= WRITEMASK_XYZW
;
132 static struct prog_dst_register
dst_mask( struct prog_dst_register reg
, int mask
)
134 reg
.WriteMask
&= mask
;
138 static struct prog_dst_register
dst_undef( void )
140 return dst_reg(PROGRAM_UNDEFINED
, 0);
145 static struct prog_dst_register
get_temp( struct brw_wm_compile
*c
)
147 int bit
= ffs( ~c
->fp_temp
);
150 _mesa_printf("%s: out of temporaries\n", __FILE__
);
154 c
->fp_temp
|= 1<<(bit
-1);
155 return dst_reg(PROGRAM_TEMPORARY
, FIRST_INTERNAL_TEMP
+(bit
-1));
159 static void release_temp( struct brw_wm_compile
*c
, struct prog_dst_register temp
)
161 c
->fp_temp
&= ~1<<(temp
.Index
+ 1 - FIRST_INTERNAL_TEMP
);
165 /***********************************************************************
169 static struct prog_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
171 return &c
->prog_instructions
[c
->nr_fp_insns
++];
174 static struct prog_instruction
*emit_insn(struct brw_wm_compile
*c
,
175 const struct prog_instruction
*inst0
)
177 struct prog_instruction
*inst
= get_fp_inst(c
);
179 inst
->Data
= (void *)inst0
;
183 static struct prog_instruction
* emit_op(struct brw_wm_compile
*c
,
185 struct prog_dst_register dest
,
188 GLuint tex_src_target
,
189 struct prog_src_register src0
,
190 struct prog_src_register src1
,
191 struct prog_src_register src2
)
193 struct prog_instruction
*inst
= get_fp_inst(c
);
195 memset(inst
, 0, sizeof(*inst
));
199 inst
->SaturateMode
= saturate
;
200 inst
->TexSrcUnit
= tex_src_unit
;
201 inst
->TexSrcTarget
= tex_src_target
;
202 inst
->SrcReg
[0] = src0
;
203 inst
->SrcReg
[1] = src1
;
204 inst
->SrcReg
[2] = src2
;
211 /***********************************************************************
212 * Special instructions for interpolation and other tasks
215 static struct prog_src_register
get_pixel_xy( struct brw_wm_compile
*c
)
217 if (src_is_undef(c
->pixel_xy
)) {
218 struct prog_dst_register pixel_xy
= get_temp(c
);
219 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
222 /* Emit the out calculations, and hold onto the results. Use
223 * two instructions as a temporary is required.
225 /* pixel_xy.xy = PIXELXY payload[0];
229 dst_mask(pixel_xy
, WRITEMASK_XY
),
235 c
->pixel_xy
= src_reg_from_dst(pixel_xy
);
241 static struct prog_src_register
get_delta_xy( struct brw_wm_compile
*c
)
243 if (src_is_undef(c
->delta_xy
)) {
244 struct prog_dst_register delta_xy
= get_temp(c
);
245 struct prog_src_register pixel_xy
= get_pixel_xy(c
);
246 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
248 /* deltas.xy = DELTAXY pixel_xy, payload[0]
252 dst_mask(delta_xy
, WRITEMASK_XY
),
258 c
->delta_xy
= src_reg_from_dst(delta_xy
);
264 static struct prog_src_register
get_pixel_w( struct brw_wm_compile
*c
)
266 if (src_is_undef(c
->pixel_w
)) {
267 struct prog_dst_register pixel_w
= get_temp(c
);
268 struct prog_src_register deltas
= get_delta_xy(c
);
269 struct prog_src_register interp_wpos
= src_reg(PROGRAM_PAYLOAD
, FRAG_ATTRIB_WPOS
);
272 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
276 dst_mask(pixel_w
, WRITEMASK_W
),
283 c
->pixel_w
= src_reg_from_dst(pixel_w
);
289 static void emit_interp( struct brw_wm_compile
*c
,
292 struct prog_dst_register dst
= dst_reg(PROGRAM_INPUT
, idx
);
293 struct prog_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
294 struct prog_src_register deltas
= get_delta_xy(c
);
295 struct prog_src_register arg2
;
298 /* Need to use PINTERP on attributes which have been
299 * multiplied by 1/W in the SF program, and LINTERP on those
303 case FRAG_ATTRIB_WPOS
:
307 /* Have to treat wpos.xy specially:
311 dst_mask(dst
, WRITEMASK_XY
),
317 dst
= dst_mask(dst
, WRITEMASK_ZW
);
319 /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
329 case FRAG_ATTRIB_COL0
:
330 case FRAG_ATTRIB_COL1
:
331 if (c
->key
.flat_shade
) {
361 c
->fp_interp_emitted
|= 1<<idx
;
364 static void emit_ddx( struct brw_wm_compile
*c
,
365 const struct prog_instruction
*inst
)
367 GLuint idx
= inst
->SrcReg
[0].Index
;
368 struct prog_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
370 c
->fp_deriv_emitted
|= 1<<idx
;
380 static void emit_ddy( struct brw_wm_compile
*c
,
381 const struct prog_instruction
*inst
)
383 GLuint idx
= inst
->SrcReg
[0].Index
;
384 struct prog_src_register interp
= src_reg(PROGRAM_PAYLOAD
, idx
);
386 c
->fp_deriv_emitted
|= 1<<idx
;
396 /***********************************************************************
397 * Hacks to extend the program parameter and constant lists.
400 /* Add the fog parameters to the parameter list of the original
401 * program, rather than creating a new list. Doesn't really do any
402 * harm and it's not as if the parameter handling isn't a big hack
405 static struct prog_src_register
search_or_add_param5(struct brw_wm_compile
*c
,
412 struct gl_program_parameter_list
*paramList
= c
->fp
->program
.Base
.Parameters
;
413 gl_state_index tokens
[STATE_LENGTH
];
421 for (idx
= 0; idx
< paramList
->NumParameters
; idx
++) {
422 if (paramList
->Parameters
[idx
].Type
== PROGRAM_STATE_VAR
&&
423 memcmp(paramList
->Parameters
[idx
].StateIndexes
, tokens
, sizeof(tokens
)) == 0)
424 return src_reg(PROGRAM_STATE_VAR
, idx
);
427 idx
= _mesa_add_state_reference( paramList
, tokens
);
429 /* Recalculate state dependency:
431 c
->fp
->param_state
= paramList
->StateFlags
;
433 return src_reg(PROGRAM_STATE_VAR
, idx
);
437 static struct prog_src_register
search_or_add_const4f( struct brw_wm_compile
*c
,
443 struct gl_program_parameter_list
*paramList
= c
->fp
->program
.Base
.Parameters
;
453 /* Have to search, otherwise multiple compilations will each grow
454 * the parameter list.
456 for (idx
= 0; idx
< paramList
->NumParameters
; idx
++) {
457 if (paramList
->Parameters
[idx
].Type
== PROGRAM_CONSTANT
&&
458 memcmp(paramList
->ParameterValues
[idx
], values
, sizeof(values
)) == 0)
460 /* XXX: this mimics the mesa bug which puts all constants and
461 * parameters into the "PROGRAM_STATE_VAR" category:
463 return src_reg(PROGRAM_STATE_VAR
, idx
);
466 idx
= _mesa_add_unnamed_constant( paramList
, values
, 4, &swizzle
);
467 assert(swizzle
== SWIZZLE_NOOP
); /* Need to handle swizzle in reg setup */
468 return src_reg(PROGRAM_STATE_VAR
, idx
);
473 /***********************************************************************
474 * Expand various instructions here to simpler forms.
476 static void precalc_dst( struct brw_wm_compile
*c
,
477 const struct prog_instruction
*inst
)
479 struct prog_src_register src0
= inst
->SrcReg
[0];
480 struct prog_src_register src1
= inst
->SrcReg
[1];
481 struct prog_dst_register dst
= inst
->DstReg
;
483 if (dst
.WriteMask
& WRITEMASK_Y
) {
484 /* dst.y = mul src0.y, src1.y
488 dst_mask(dst
, WRITEMASK_Y
),
489 inst
->SaturateMode
, 0, 0,
496 if (dst
.WriteMask
& WRITEMASK_XZ
) {
497 GLuint z
= GET_SWZ(src0
.Swizzle
, Z
);
499 /* dst.xz = swz src0.1zzz
503 dst_mask(dst
, WRITEMASK_XZ
),
504 inst
->SaturateMode
, 0, 0,
505 src_swizzle(src0
, SWIZZLE_ONE
, z
, z
, z
),
509 if (dst
.WriteMask
& WRITEMASK_W
) {
510 /* dst.w = mov src1.w
514 dst_mask(dst
, WRITEMASK_W
),
515 inst
->SaturateMode
, 0, 0,
523 static void precalc_lit( struct brw_wm_compile
*c
,
524 const struct prog_instruction
*inst
)
526 struct prog_src_register src0
= inst
->SrcReg
[0];
527 struct prog_dst_register dst
= inst
->DstReg
;
529 if (dst
.WriteMask
& WRITEMASK_XW
) {
530 /* dst.xw = swz src0.1111
534 dst_mask(dst
, WRITEMASK_XW
),
536 src_swizzle1(src0
, SWIZZLE_ONE
),
542 if (dst
.WriteMask
& WRITEMASK_YZ
) {
545 dst_mask(dst
, WRITEMASK_YZ
),
546 inst
->SaturateMode
, 0, 0,
553 static void precalc_tex( struct brw_wm_compile
*c
,
554 const struct prog_instruction
*inst
)
556 struct prog_src_register coord
;
557 struct prog_dst_register tmpcoord
;
559 if (inst
->TexSrcTarget
== TEXTURE_CUBE_INDEX
) {
560 struct prog_instruction
*out
;
561 struct prog_dst_register tmp0
= get_temp(c
);
562 struct prog_src_register tmp0src
= src_reg_from_dst(tmp0
);
563 struct prog_dst_register tmp1
= get_temp(c
);
564 struct prog_src_register tmp1src
= src_reg_from_dst(tmp1
);
565 struct prog_src_register src0
= inst
->SrcReg
[0];
567 tmpcoord
= get_temp(c
);
568 coord
= src_reg_from_dst(tmpcoord
);
570 out
= emit_op(c
, OPCODE_MOV
,
576 out
->SrcReg
[0].NegateBase
= 0;
577 out
->SrcReg
[0].Abs
= 1;
579 emit_op(c
, OPCODE_MAX
,
582 src_swizzle1(coord
, X
),
583 src_swizzle1(coord
, Y
),
586 emit_op(c
, OPCODE_MAX
,
590 src_swizzle1(coord
, Z
),
593 emit_op(c
, OPCODE_RCP
,
600 emit_op(c
, OPCODE_MUL
,
607 release_temp(c
, tmp0
);
608 release_temp(c
, tmp1
);
609 } else if (inst
->TexSrcTarget
== TEXTURE_RECT_INDEX
) {
610 struct prog_src_register scale
=
611 search_or_add_param5( c
,
617 tmpcoord
= get_temp(c
);
619 /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height }
629 coord
= src_reg_from_dst(tmpcoord
);
632 coord
= inst
->SrcReg
[0];
635 /* Need to emit YUV texture conversions by hand. Probably need to
636 * do this here - the alternative is in brw_wm_emit.c, but the
637 * conversion requires allocating a temporary variable which we
638 * don't have the facility to do that late in the compilation.
640 if (!(c
->key
.yuvtex_mask
& (1<<inst
->TexSrcUnit
))) {
653 CONST C0 = { -.5, -.0625, -.5, 1.164 }
654 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
656 UYV.xyz = ADD UYV, C0
657 UYV.y = MUL UYV.y, C0.w
658 RGB.xyz = MAD UYV.xxz, C1, UYV.y
659 RGB.y = MAD UYV.z, C1.w, RGB.y
661 struct prog_dst_register dst
= inst
->DstReg
;
662 struct prog_src_register src0
= inst
->SrcReg
[0];
663 struct prog_dst_register tmp
= get_temp(c
);
664 struct prog_src_register tmpsrc
= src_reg_from_dst(tmp
);
665 struct prog_src_register C0
= search_or_add_const4f( c
, -.5, -.0625, -.5, 1.164 );
666 struct prog_src_register C1
= search_or_add_const4f( c
, 1.596, -0.813, 2.018, -.391 );
680 /* tmp.xyz = ADD TMP, C0
684 dst_mask(tmp
, WRITEMASK_XYZ
),
690 /* YUV.y = MUL YUV.y, C0.w
694 dst_mask(tmp
, WRITEMASK_Y
),
700 /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
704 dst_mask(dst
, WRITEMASK_XYZ
),
706 src_swizzle(tmpsrc
, X
,X
,Z
,Z
),
708 src_swizzle1(tmpsrc
, Y
));
710 /* RGB.y = MAD YUV.z, C1.w, RGB.y
714 dst_mask(dst
, WRITEMASK_Y
),
716 src_swizzle1(tmpsrc
, Z
),
718 src_swizzle1(src_reg_from_dst(dst
), Y
));
720 release_temp(c
, tmp
);
723 if (inst
->TexSrcTarget
== GL_TEXTURE_RECTANGLE_NV
)
724 release_temp(c
, tmpcoord
);
728 static GLboolean
projtex( struct brw_wm_compile
*c
,
729 const struct prog_instruction
*inst
)
731 struct prog_src_register src
= inst
->SrcReg
[0];
733 /* Only try to detect the simplest cases. Could detect (later)
734 * cases where we are trying to emit code like RCP {1.0}, MUL x,
737 * More complex cases than this typically only arise from
738 * user-provided fragment programs anyway:
740 if (inst
->TexSrcTarget
== TEXTURE_CUBE_INDEX
)
741 return 0; /* ut2004 gun rendering !?! */
742 else if (src
.File
== PROGRAM_INPUT
&&
743 GET_SWZ(src
.Swizzle
, W
) == W
&&
744 (c
->key
.projtex_mask
& (1<<(src
.Index
+ FRAG_ATTRIB_WPOS
- FRAG_ATTRIB_TEX0
))) == 0)
751 static void precalc_txp( struct brw_wm_compile
*c
,
752 const struct prog_instruction
*inst
)
754 struct prog_src_register src0
= inst
->SrcReg
[0];
756 if (projtex(c
, inst
)) {
757 struct prog_dst_register tmp
= get_temp(c
);
758 struct prog_instruction tmp_inst
;
760 /* tmp0.w = RCP inst.arg[0][3]
764 dst_mask(tmp
, WRITEMASK_W
),
766 src_swizzle1(src0
, GET_SWZ(src0
.Swizzle
, W
)),
770 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
774 dst_mask(tmp
, WRITEMASK_XYZ
),
777 src_swizzle1(src_reg_from_dst(tmp
), W
),
780 /* dst = precalc(TEX tmp0)
783 tmp_inst
.SrcReg
[0] = src_reg_from_dst(tmp
);
784 precalc_tex(c
, &tmp_inst
);
786 release_temp(c
, tmp
);
790 /* dst = precalc(TEX src0)
792 precalc_tex(c
, inst
);
800 /***********************************************************************
801 * Add instructions to perform fog blending
804 static void fog_blend( struct brw_wm_compile
*c
,
805 struct prog_src_register fog_factor
)
807 struct prog_dst_register outcolor
= dst_reg(PROGRAM_OUTPUT
, FRAG_RESULT_COLR
);
808 struct prog_src_register fogcolor
= search_or_add_param5( c
, STATE_FOG_COLOR
, 0,0,0,0 );
810 /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
814 dst_mask(outcolor
, WRITEMASK_XYZ
),
817 src_reg_from_dst(outcolor
),
823 /* This one is simple - just take the interpolated fog coordinate and
824 * use it as the fog blend factor.
826 static void fog_interpolated( struct brw_wm_compile
*c
)
828 struct prog_src_register fogc
= src_reg(PROGRAM_INPUT
, FRAG_ATTRIB_FOGC
);
830 if (!(c
->fp_interp_emitted
& (1<<FRAG_ATTRIB_FOGC
)))
831 emit_interp(c
, FRAG_ATTRIB_FOGC
);
833 fog_blend( c
, src_swizzle1(fogc
, GET_SWZ(fogc
.Swizzle
,X
)));
836 static void emit_fog( struct brw_wm_compile
*c
)
838 if (!c
->fp
->program
.FogOption
)
842 fog_interpolated( c
);
844 /* TODO: per-pixel fog */
849 static void emit_fb_write( struct brw_wm_compile
*c
)
851 struct prog_src_register outcolor
= src_reg(PROGRAM_OUTPUT
, FRAG_RESULT_COLR
);
852 struct prog_src_register payload_r0_depth
= src_reg(PROGRAM_PAYLOAD
, PAYLOAD_DEPTH
);
853 struct prog_src_register outdepth
= src_reg(PROGRAM_OUTPUT
, FRAG_RESULT_DEPR
);
857 dst_mask(dst_undef(),0),
867 /***********************************************************************
868 * Emit INTERP instructions ahead of first use of each attrib.
871 static void validate_src_regs( struct brw_wm_compile
*c
,
872 const struct prog_instruction
*inst
)
874 GLuint nr_args
= brw_wm_nr_args( inst
->Opcode
);
877 for (i
= 0; i
< nr_args
; i
++) {
878 if (inst
->SrcReg
[i
].File
== PROGRAM_INPUT
) {
879 GLuint idx
= inst
->SrcReg
[i
].Index
;
880 if (!(c
->fp_interp_emitted
& (1<<idx
))) {
889 static void print_insns( const struct prog_instruction
*insn
,
893 for (i
= 0; i
< nr
; i
++, insn
++) {
894 _mesa_printf("%3d: ", i
);
895 if (insn
->Opcode
< MAX_OPCODE
)
896 _mesa_print_instruction(insn
);
897 else if (insn
->Opcode
< MAX_WM_OPCODE
) {
898 GLuint idx
= insn
->Opcode
- MAX_OPCODE
;
900 _mesa_print_alu_instruction(insn
,
901 wm_opcode_strings
[idx
],
905 _mesa_printf("UNKNOWN\n");
910 void brw_wm_pass_fp( struct brw_wm_compile
*c
)
912 struct brw_fragment_program
*fp
= c
->fp
;
915 if (INTEL_DEBUG
& DEBUG_WM
) {
916 _mesa_printf("\n\n\npre-fp:\n");
917 _mesa_print_program(&fp
->program
.Base
);
921 c
->pixel_xy
= src_undef();
922 c
->delta_xy
= src_undef();
923 c
->pixel_w
= src_undef();
926 /* Emit preamble instructions:
930 for (insn
= 0; insn
< fp
->program
.Base
.NumInstructions
; insn
++) {
931 const struct prog_instruction
*inst
= &fp
->program
.Base
.Instructions
[insn
];
932 struct prog_instruction
*out
;
934 /* Check for INPUT values, emit INTERP instructions where
937 validate_src_regs(c
, inst
);
940 switch (inst
->Opcode
) {
942 out
= emit_insn(c
, inst
);
943 out
->Opcode
= OPCODE_MOV
;
947 out
= emit_insn(c
, inst
);
948 out
->Opcode
= OPCODE_MOV
;
949 out
->SrcReg
[0].NegateBase
= 0;
950 out
->SrcReg
[0].Abs
= 1;
954 out
= emit_insn(c
, inst
);
955 out
->Opcode
= OPCODE_ADD
;
956 out
->SrcReg
[1].NegateBase
^= 0xf;
960 out
= emit_insn(c
, inst
);
961 /* This should probably be done in the parser.
963 out
->DstReg
.WriteMask
&= WRITEMASK_XY
;
967 precalc_dst(c
, inst
);
971 precalc_lit(c
, inst
);
975 precalc_txp(c
, inst
);
979 out
= emit_insn(c
, inst
);
980 /* This should probably be done in the parser.
982 out
->DstReg
.WriteMask
&= WRITEMASK_XYZ
;
986 out
= emit_insn(c
, inst
);
987 /* This should probably be done in the parser.
989 out
->DstReg
.WriteMask
= 0;
1010 if (INTEL_DEBUG
& DEBUG_WM
) {
1011 _mesa_printf("\n\n\npass_fp:\n");
1012 print_insns( c
->prog_instructions
, c
->nr_fp_insns
);