2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "pipe/p_shader_tokens.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
45 #include "brw_debug.h"
48 /***********************************************************************
52 static struct brw_fp_src
src_reg(GLuint file
, GLuint idx
)
54 struct brw_fp_src reg
;
57 reg
.swizzle
= BRW_SWIZZLE_XYZW
;
64 static struct brw_fp_src
src_reg_from_dst(struct brw_fp_dst dst
)
66 return src_reg(dst
.file
, dst
.index
);
69 static struct brw_fp_src
src_undef( void )
71 return src_reg(TGSI_FILE_NULL
, 0);
74 static GLboolean
src_is_undef(struct brw_fp_src src
)
76 return src
.file
== TGSI_FILE_NULL
;
79 static struct brw_fp_src
src_swizzle( struct brw_fp_src reg
, int x
, int y
, int z
, int w
)
81 unsigned swz
= reg
.swizzle
;
83 reg
.swizzle
= ( BRW_GET_SWZ(swz
, x
) << 0 |
84 BRW_GET_SWZ(swz
, y
) << 2 |
85 BRW_GET_SWZ(swz
, z
) << 4 |
86 BRW_GET_SWZ(swz
, w
) << 6 );
91 static struct brw_fp_src
src_scalar( struct brw_fp_src reg
, int x
)
93 return src_swizzle(reg
, x
, x
, x
, x
);
96 static struct brw_fp_src
src_abs( struct brw_fp_src src
)
103 static struct brw_fp_src
src_negate( struct brw_fp_src src
)
111 static int match_or_expand_immediate( const float *v
,
121 for (i
= 0; i
< nr
; i
++) {
122 boolean found
= FALSE
;
124 for (j
= 0; j
< *nr2
&& !found
; j
++) {
126 *swizzle
|= j
<< (i
* 2);
136 *swizzle
|= *nr2
<< (i
* 2);
146 /* Internally generated immediates: overkill...
148 static struct brw_fp_src
src_imm( struct brw_wm_compile
*c
,
155 /* Could do a first pass where we examine all existing immediates
159 for (i
= 0; i
< c
->nr_immediates
; i
++) {
160 if (match_or_expand_immediate( v
,
168 if (c
->nr_immediates
< Elements(c
->immediate
)) {
169 i
= c
->nr_immediates
++;
170 if (match_or_expand_immediate( v
,
182 /* Make sure that all referenced elements are from this immediate.
183 * Has the effect of making size-one immediates into scalars.
185 for (j
= nr
; j
< 4; j
++)
186 swizzle
|= (swizzle
& 0x3) << (j
* 2);
188 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE
, i
),
189 BRW_GET_SWZ(swizzle
, X
),
190 BRW_GET_SWZ(swizzle
, Y
),
191 BRW_GET_SWZ(swizzle
, Z
),
192 BRW_GET_SWZ(swizzle
, W
) );
197 static struct brw_fp_src
src_imm1f( struct brw_wm_compile
*c
,
200 return src_imm(c
, &f
, 1);
203 static struct brw_fp_src
src_imm4f( struct brw_wm_compile
*c
,
209 GLfloat f
[4] = {x
,y
,z
,w
};
210 return src_imm(c
, f
, 4);
215 /***********************************************************************
219 static struct brw_fp_dst
dst_reg(GLuint file
, GLuint idx
)
221 struct brw_fp_dst reg
;
224 reg
.writemask
= BRW_WRITEMASK_XYZW
;
230 static struct brw_fp_dst
dst_mask( struct brw_fp_dst reg
, int mask
)
232 reg
.writemask
&= mask
;
236 static struct brw_fp_dst
dst_undef( void )
238 return dst_reg(TGSI_FILE_NULL
, 0);
241 static boolean
dst_is_undef( struct brw_fp_dst dst
)
243 return dst
.file
== TGSI_FILE_NULL
;
246 static struct brw_fp_dst
dst_saturate( struct brw_fp_dst reg
, boolean flag
)
252 static struct brw_fp_dst
get_temp( struct brw_wm_compile
*c
)
254 int bit
= ffs( ~c
->fp_temp
);
257 debug_printf("%s: out of temporaries\n", __FILE__
);
260 c
->fp_temp
|= 1<<(bit
-1);
261 return dst_reg(TGSI_FILE_TEMPORARY
, c
->fp_first_internal_temp
+(bit
-1));
265 static void release_temp( struct brw_wm_compile
*c
, struct brw_fp_dst temp
)
267 c
->fp_temp
&= ~(1 << (temp
.index
- c
->fp_first_internal_temp
));
271 /***********************************************************************
275 static struct brw_fp_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
277 return &c
->fp_instructions
[c
->nr_fp_insns
++];
280 static struct brw_fp_instruction
* emit_tex_op(struct brw_wm_compile
*c
,
282 struct brw_fp_dst dest
,
285 struct brw_fp_src src0
,
286 struct brw_fp_src src1
,
287 struct brw_fp_src src2
)
289 struct brw_fp_instruction
*inst
= get_fp_inst(c
);
291 if (tex_unit
|| target
)
292 assert(op
== TGSI_OPCODE_TXP
||
293 op
== TGSI_OPCODE_TXB
||
294 op
== TGSI_OPCODE_TEX
||
299 inst
->tex_unit
= tex_unit
;
300 inst
->target
= target
;
309 static INLINE
void emit_op3(struct brw_wm_compile
*c
,
311 struct brw_fp_dst dest
,
312 struct brw_fp_src src0
,
313 struct brw_fp_src src1
,
314 struct brw_fp_src src2
)
316 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src2
);
320 static INLINE
void emit_op2(struct brw_wm_compile
*c
,
322 struct brw_fp_dst dest
,
323 struct brw_fp_src src0
,
324 struct brw_fp_src src1
)
326 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src1
, src_undef());
329 static INLINE
void emit_op1(struct brw_wm_compile
*c
,
331 struct brw_fp_dst dest
,
332 struct brw_fp_src src0
)
334 emit_tex_op(c
, op
, dest
, 0, 0, src0
, src_undef(), src_undef());
337 static INLINE
void emit_op0(struct brw_wm_compile
*c
,
339 struct brw_fp_dst dest
)
341 emit_tex_op(c
, op
, dest
, 0, 0, src_undef(), src_undef(), src_undef());
346 /* Many opcodes produce the same value across all the result channels.
347 * We'd rather not have to support that splatting in the opcode implementations,
348 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
349 * anyway. We can easily get both by emitting the opcode to one channel, and
350 * then MOVing it to the others, which brw_wm_pass*.c already understands.
352 static void emit_scalar_insn(struct brw_wm_compile
*c
,
354 struct brw_fp_dst dst
,
355 struct brw_fp_src src0
,
356 struct brw_fp_src src1
,
357 struct brw_fp_src src2
)
359 unsigned first_chan
= ffs(dst
.writemask
) - 1;
360 unsigned first_mask
= 1 << first_chan
;
362 if (dst
.writemask
== 0)
366 dst_mask(dst
, first_mask
),
369 if (dst
.writemask
!= first_mask
) {
370 emit_op1(c
, TGSI_OPCODE_MOV
,
371 dst_mask(dst
, ~first_mask
),
372 src_scalar(src_reg_from_dst(dst
), first_chan
));
377 /***********************************************************************
378 * Special instructions for interpolation and other tasks
381 static struct brw_fp_src
get_pixel_xy( struct brw_wm_compile
*c
)
383 if (src_is_undef(c
->fp_pixel_xy
)) {
384 struct brw_fp_dst pixel_xy
= get_temp(c
);
385 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
388 /* Emit the out calculations, and hold onto the results. Use
389 * two instructions as a temporary is required.
391 /* pixel_xy.xy = PIXELXY payload[0];
395 dst_mask(pixel_xy
, BRW_WRITEMASK_XY
),
398 c
->fp_pixel_xy
= src_reg_from_dst(pixel_xy
);
401 return c
->fp_pixel_xy
;
404 static struct brw_fp_src
get_delta_xy( struct brw_wm_compile
*c
)
406 if (src_is_undef(c
->fp_delta_xy
)) {
407 struct brw_fp_dst delta_xy
= get_temp(c
);
408 struct brw_fp_src pixel_xy
= get_pixel_xy(c
);
409 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
411 /* deltas.xy = DELTAXY pixel_xy, payload[0]
415 dst_mask(delta_xy
, BRW_WRITEMASK_XY
),
420 c
->fp_delta_xy
= src_reg_from_dst(delta_xy
);
423 return c
->fp_delta_xy
;
426 static struct brw_fp_src
get_pixel_w( struct brw_wm_compile
*c
)
428 if (src_is_undef(c
->fp_pixel_w
)) {
429 struct brw_fp_dst pixel_w
= get_temp(c
);
430 struct brw_fp_src deltas
= get_delta_xy(c
);
432 /* XXX: assuming position is always first -- valid?
434 struct brw_fp_src interp_wpos
= src_reg(BRW_FILE_PAYLOAD
, 0);
436 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
440 dst_mask(pixel_w
, BRW_WRITEMASK_W
),
446 c
->fp_pixel_w
= src_reg_from_dst(pixel_w
);
449 return c
->fp_pixel_w
;
453 /***********************************************************************
454 * Emit INTERP instructions ahead of first use of each attrib.
457 static void emit_interp( struct brw_wm_compile
*c
,
462 struct brw_fp_dst dst
= dst_reg(TGSI_FILE_INPUT
, idx
);
463 struct brw_fp_src interp
= src_reg(BRW_FILE_PAYLOAD
, idx
);
464 struct brw_fp_src deltas
= get_delta_xy(c
);
466 /* Need to use PINTERP on attributes which have been
467 * multiplied by 1/W in the SF program, and LINTERP on those
471 case TGSI_SEMANTIC_POSITION
:
472 /* Have to treat wpos.xy specially:
476 dst_mask(dst
, BRW_WRITEMASK_XY
),
479 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
483 dst_mask(dst
, BRW_WRITEMASK_ZW
),
488 case TGSI_SEMANTIC_COLOR
:
489 if (c
->key
.flat_shade
) {
495 else if (interp_mode
== TGSI_INTERPOLATE_LINEAR
) {
513 case TGSI_SEMANTIC_FOG
:
514 /* Interpolate the fog coordinate */
517 dst_mask(dst
, BRW_WRITEMASK_X
),
524 dst_mask(dst
, BRW_WRITEMASK_YZ
),
529 dst_mask(dst
, BRW_WRITEMASK_W
),
533 case TGSI_SEMANTIC_FACE
:
534 /* XXX review/test this case */
537 dst_mask(dst
, BRW_WRITEMASK_X
));
541 dst_mask(dst
, BRW_WRITEMASK_YZ
),
546 dst_mask(dst
, BRW_WRITEMASK_W
),
550 case TGSI_SEMANTIC_PSIZE
:
551 /* XXX review/test this case */
554 dst_mask(dst
, BRW_WRITEMASK_XY
),
561 dst_mask(dst
, BRW_WRITEMASK_Z
),
566 dst_mask(dst
, BRW_WRITEMASK_W
),
571 switch (interp_mode
) {
572 case TGSI_INTERPOLATE_CONSTANT
:
579 case TGSI_INTERPOLATE_LINEAR
:
587 case TGSI_INTERPOLATE_PERSPECTIVE
:
601 /***********************************************************************
602 * Expand various instructions here to simpler forms.
604 static void precalc_dst( struct brw_wm_compile
*c
,
605 struct brw_fp_dst dst
,
606 struct brw_fp_src src0
,
607 struct brw_fp_src src1
)
609 if (dst
.writemask
& BRW_WRITEMASK_Y
) {
610 /* dst.y = mul src0.y, src1.y
614 dst_mask(dst
, BRW_WRITEMASK_Y
),
619 if (dst
.writemask
& BRW_WRITEMASK_XZ
) {
620 /* dst.z = mov src0.zzzz
624 dst_mask(dst
, BRW_WRITEMASK_Z
),
625 src_scalar(src0
, Z
));
627 /* dst.x = imm1f(1.0)
631 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_X
), 0),
634 if (dst
.writemask
& BRW_WRITEMASK_W
) {
635 /* dst.w = mov src1.w
639 dst_mask(dst
, BRW_WRITEMASK_W
),
645 static void precalc_lit( struct brw_wm_compile
*c
,
646 struct brw_fp_dst dst
,
647 struct brw_fp_src src0
)
649 if (dst
.writemask
& BRW_WRITEMASK_XW
) {
650 /* dst.xw = imm(1.0f)
654 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_XW
), 0),
658 if (dst
.writemask
& BRW_WRITEMASK_YZ
) {
661 dst_mask(dst
, BRW_WRITEMASK_YZ
),
668 * Some TEX instructions require extra code, cube map coordinate
669 * normalization, or coordinate scaling for RECT textures, etc.
670 * This function emits those extra instructions and the TEX
671 * instruction itself.
673 static void precalc_tex( struct brw_wm_compile
*c
,
674 struct brw_fp_dst dst
,
677 struct brw_fp_src src0
)
679 struct brw_fp_src coord
= src_undef();
680 struct brw_fp_dst tmp
= dst_undef();
682 assert(unit
< BRW_MAX_TEX_UNIT
);
684 /* Cubemap: find longest component of coord vector and normalize
687 if (target
== TGSI_TEXTURE_CUBE
) {
688 struct brw_fp_src tmpsrc
;
691 tmpsrc
= src_reg_from_dst(tmp
);
693 /* tmp = abs(src0) */
699 /* tmp.X = MAX(tmp.X, tmp.Y) */
700 emit_op2(c
, TGSI_OPCODE_MAX
,
701 dst_mask(tmp
, BRW_WRITEMASK_X
),
702 src_scalar(tmpsrc
, X
),
703 src_scalar(tmpsrc
, Y
));
705 /* tmp.X = MAX(tmp.X, tmp.Z) */
706 emit_op2(c
, TGSI_OPCODE_MAX
,
707 dst_mask(tmp
, BRW_WRITEMASK_X
),
709 src_scalar(tmpsrc
, Z
));
711 /* tmp.X = 1 / tmp.X */
712 emit_op1(c
, TGSI_OPCODE_RCP
,
713 dst_mask(tmp
, BRW_WRITEMASK_X
),
716 /* tmp = src0 * tmp.xxxx */
717 emit_op2(c
, TGSI_OPCODE_MUL
,
720 src_scalar(tmpsrc
, X
));
724 else if (target
== TGSI_TEXTURE_RECT
||
725 target
== TGSI_TEXTURE_SHADOWRECT
) {
726 /* XXX: need a mechanism for internally generated constants.
734 /* Need to emit YUV texture conversions by hand. Probably need to
735 * do this here - the alternative is in brw_wm_emit.c, but the
736 * conversion requires allocating a temporary variable which we
737 * don't have the facility to do that late in the compilation.
739 if (c
->key
.yuvtex_mask
& (1 << unit
)) {
740 /* convert ycbcr to RGBA */
741 GLboolean swap_uv
= c
->key
.yuvtex_swap_mask
& (1<<unit
);
742 struct brw_fp_dst tmp
= get_temp(c
);
743 struct brw_fp_src tmpsrc
= src_reg_from_dst(tmp
);
744 struct brw_fp_src C0
= src_imm4f( c
, -.5, -.0625, -.5, 1.164 );
745 struct brw_fp_src C1
= src_imm4f( c
, 1.596, -0.813, 2.018, -.391 );
751 dst_saturate(tmp
, dst
.saturate
),
758 /* tmp.xyz = ADD TMP, C0
760 emit_op2(c
, TGSI_OPCODE_ADD
,
761 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
765 /* YUV.y = MUL YUV.y, C0.w
767 emit_op2(c
, TGSI_OPCODE_MUL
,
768 dst_mask(tmp
, BRW_WRITEMASK_Y
),
774 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
776 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
779 emit_op3(c
, TGSI_OPCODE_MAD
,
780 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
782 src_swizzle(tmpsrc
, Z
,Z
,X
,X
) :
783 src_swizzle(tmpsrc
, X
,X
,Z
,Z
)),
785 src_scalar(tmpsrc
, Y
));
787 /* RGB.y = MAD YUV.z, C1.w, RGB.y
791 dst_mask(dst
, BRW_WRITEMASK_Y
),
792 src_scalar(tmpsrc
, Z
),
794 src_scalar(src_reg_from_dst(dst
), Y
));
796 release_temp(c
, tmp
);
799 /* ordinary RGBA tex instruction */
810 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
811 * generating shader varients in mesa state tracker.
814 /* Release this temp if we ended up allocating it:
816 if (!dst_is_undef(tmp
))
817 release_temp(c
, tmp
);
822 * Check if the given TXP instruction really needs the divide-by-W step.
824 static GLboolean
projtex( struct brw_wm_compile
*c
,
826 struct brw_fp_src src
)
828 /* Only try to detect the simplest cases. Could detect (later)
829 * cases where we are trying to emit code like RCP {1.0}, MUL x,
832 * More complex cases than this typically only arise from
833 * user-provided fragment programs anyway:
835 if (target
== TGSI_TEXTURE_CUBE
)
836 return GL_FALSE
; /* ut2004 gun rendering !?! */
838 if (src
.file
== TGSI_FILE_INPUT
&&
839 BRW_GET_SWZ(src
.swizzle
, W
) == W
&&
840 c
->fp
->info
.input_interpolate
[src
.index
] != TGSI_INTERPOLATE_PERSPECTIVE
)
850 static void precalc_txp( struct brw_wm_compile
*c
,
851 struct brw_fp_dst dst
,
854 struct brw_fp_src src0
)
856 if (projtex(c
, target
, src0
)) {
857 struct brw_fp_dst tmp
= get_temp(c
);
859 /* tmp0.w = RCP inst.arg[0][3]
863 dst_mask(tmp
, BRW_WRITEMASK_W
),
864 src_scalar(src0
, W
));
866 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
870 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
872 src_scalar(src_reg_from_dst(tmp
), W
));
880 src_reg_from_dst(tmp
));
882 release_temp(c
, tmp
);
888 precalc_tex(c
, dst
, target
, unit
, src0
);
893 /* XXX: note this returns a src_reg.
895 static struct brw_fp_src
896 find_output_by_semantic( struct brw_wm_compile
*c
,
900 const struct tgsi_shader_info
*info
= &c
->fp
->info
;
903 for (i
= 0; i
< info
->num_outputs
; i
++)
904 if (info
->output_semantic_name
[i
] == semantic
&&
905 info
->output_semantic_index
[i
] == index
)
906 return src_reg( TGSI_FILE_OUTPUT
, i
);
908 /* If not found, return some arbitrary immediate value:
910 * XXX: this is a good idea but immediates are up generating extra
911 * curbe entries atm, as they would have in the original driver.
913 return src_reg( TGSI_FILE_OUTPUT
, 0 ); /* src_imm1f(c, 1.0); */
917 static void emit_fb_write( struct brw_wm_compile
*c
)
919 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
920 struct brw_fp_src outdepth
= find_output_by_semantic(c
, TGSI_SEMANTIC_POSITION
, 0);
924 outdepth
= src_scalar(outdepth
, Z
);
926 for (i
= 0 ; i
< c
->key
.nr_cbufs
; i
++) {
927 struct brw_fp_src outcolor
;
929 outcolor
= find_output_by_semantic(c
, TGSI_SEMANTIC_COLOR
, i
);
931 /* Use emit_tex_op so that we can specify the inst->target
932 * field, which is abused to contain the FB write target and the
935 emit_tex_op(c
, WM_FB_WRITE
,
937 (i
== c
->key
.nr_cbufs
- 1), /* EOT */
946 static struct brw_fp_dst
translate_dst( struct brw_wm_compile
*c
,
947 const struct tgsi_full_dst_register
*dst
,
950 struct brw_fp_dst out
;
952 out
.file
= dst
->DstRegister
.File
;
953 out
.index
= dst
->DstRegister
.Index
;
954 out
.writemask
= dst
->DstRegister
.WriteMask
;
955 out
.indirect
= dst
->DstRegister
.Indirect
;
956 out
.saturate
= (saturate
== TGSI_SAT_ZERO_ONE
);
959 assert(dst
->DstRegisterInd
.File
== TGSI_FILE_ADDRESS
);
960 assert(dst
->DstRegisterInd
.Index
== 0);
967 static struct brw_fp_src
translate_src( struct brw_wm_compile
*c
,
968 const struct tgsi_full_src_register
*src
)
970 struct brw_fp_src out
;
972 out
.file
= src
->SrcRegister
.File
;
973 out
.index
= src
->SrcRegister
.Index
;
974 out
.indirect
= src
->SrcRegister
.Indirect
;
976 out
.swizzle
= ((src
->SrcRegister
.SwizzleX
<< 0) |
977 (src
->SrcRegister
.SwizzleY
<< 2) |
978 (src
->SrcRegister
.SwizzleZ
<< 4) |
979 (src
->SrcRegister
.SwizzleW
<< 6));
981 switch (tgsi_util_get_full_src_register_sign_mode( src
, 0 )) {
982 case TGSI_UTIL_SIGN_CLEAR
:
987 case TGSI_UTIL_SIGN_SET
:
992 case TGSI_UTIL_SIGN_TOGGLE
:
997 case TGSI_UTIL_SIGN_KEEP
:
1005 assert(src
->SrcRegisterInd
.File
== TGSI_FILE_ADDRESS
);
1006 assert(src
->SrcRegisterInd
.Index
== 0);
1014 static void emit_insn( struct brw_wm_compile
*c
,
1015 const struct tgsi_full_instruction
*inst
)
1017 unsigned opcode
= inst
->Instruction
.Opcode
;
1018 struct brw_fp_dst dst
;
1019 struct brw_fp_src src
[3];
1022 dst
= translate_dst( c
, &inst
->FullDstRegisters
[0],
1023 inst
->Instruction
.Saturate
);
1025 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++)
1026 src
[i
] = translate_src( c
, &inst
->FullSrcRegisters
[i
] );
1029 case TGSI_OPCODE_ABS
:
1030 emit_op1(c
, TGSI_OPCODE_MOV
,
1035 case TGSI_OPCODE_SUB
:
1036 emit_op2(c
, TGSI_OPCODE_ADD
,
1039 src_negate(src
[1]));
1042 case TGSI_OPCODE_SCS
:
1043 emit_op1(c
, TGSI_OPCODE_SCS
,
1044 dst_mask(dst
, BRW_WRITEMASK_XY
),
1048 case TGSI_OPCODE_DST
:
1049 precalc_dst(c
, dst
, src
[0], src
[1]);
1052 case TGSI_OPCODE_LIT
:
1053 precalc_lit(c
, dst
, src
[0]);
1056 case TGSI_OPCODE_TEX
:
1058 inst
->InstructionExtTexture
.Texture
,
1059 src
[0].file
, /* sampler unit */
1063 case TGSI_OPCODE_TXP
:
1065 inst
->InstructionExtTexture
.Texture
,
1066 src
[0].file
, /* sampler unit */
1070 case TGSI_OPCODE_TXB
:
1071 /* XXX: TXB not done
1074 inst
->InstructionExtTexture
.Texture
,
1075 src
[0].file
, /* sampler unit */
1079 case TGSI_OPCODE_XPD
:
1080 emit_op2(c
, TGSI_OPCODE_XPD
,
1081 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
1086 case TGSI_OPCODE_KIL
:
1087 emit_op1(c
, TGSI_OPCODE_KIL
,
1088 dst_mask(dst_undef(), 0),
1092 case TGSI_OPCODE_END
:
1096 if (!c
->key
.has_flow_control
&&
1097 brw_wm_is_scalar_result(opcode
))
1098 emit_scalar_insn(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1100 emit_op3(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1106 * Initial pass for fragment program code generation.
1107 * This function is used by both the GLSL and non-GLSL paths.
1109 int brw_wm_pass_fp( struct brw_wm_compile
*c
)
1111 struct brw_fragment_shader
*fs
= c
->fp
;
1112 struct tgsi_parse_context parse
;
1113 struct tgsi_full_instruction
*inst
;
1114 struct tgsi_full_declaration
*decl
;
1119 if (BRW_DEBUG
& DEBUG_WM
) {
1120 debug_printf("pre-fp:\n");
1121 tgsi_dump(fs
->tokens
, 0);
1124 c
->fp_pixel_xy
= src_undef();
1125 c
->fp_delta_xy
= src_undef();
1126 c
->fp_pixel_w
= src_undef();
1128 c
->nr_immediates
= 0;
1131 /* Loop over all instructions doing assorted simplifications and
1134 tgsi_parse_init( &parse
, fs
->tokens
);
1135 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1136 tgsi_parse_token( &parse
);
1138 switch( parse
.FullToken
.Token
.Type
) {
1139 case TGSI_TOKEN_TYPE_DECLARATION
:
1140 /* Turn intput declarations into special WM_* instructions.
1142 * XXX: For non-branching shaders, consider deferring variable
1143 * initialization as late as possible to minimize register
1144 * usage. This is how the original BRW driver worked.
1146 * In a branching shader, must preamble instructions at decl
1147 * time, as instruction order in the shader does not
1148 * correspond to the order instructions are executed in the
1151 * This is where special instructions such as WM_CINTERP,
1152 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1153 * compute shader inputs from the payload registers and pixel
1156 decl
= &parse
.FullToken
.FullDeclaration
;
1157 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1158 unsigned first
, last
, mask
;
1161 first
= decl
->DeclarationRange
.First
;
1162 last
= decl
->DeclarationRange
.Last
;
1163 mask
= decl
->Declaration
.UsageMask
;
1165 for (attrib
= first
; attrib
<= last
; attrib
++) {
1168 decl
->Semantic
.SemanticName
,
1169 decl
->Declaration
.Interpolate
);
1175 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1176 /* Unlike VS programs we can probably manage fine encoding
1177 * immediate values directly into the emitted EU
1178 * instructions, as we probably only need to reference one
1179 * float value per instruction. Just save the data for now
1180 * and use directly later.
1182 i
= c
->nr_immediates
++;
1183 imm
= &parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1184 size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1186 if (c
->nr_immediates
>= BRW_WM_MAX_CONST
)
1187 return PIPE_ERROR_OUT_OF_MEMORY
;
1189 for (i
= 0; i
< size
; i
++)
1190 c
->immediate
[c
->nr_immediates
].v
[i
] = imm
[i
];
1193 c
->immediate
[c
->nr_immediates
].v
[i
] = 0.0;
1195 c
->immediate
[c
->nr_immediates
].nr
= size
;
1199 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1200 inst
= &parse
.FullToken
.FullInstruction
;
1206 if (BRW_DEBUG
& DEBUG_WM
) {
1207 brw_wm_print_fp_program( c
, "pass_fp" );