1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "i915_context.h"
32 #include "pipe/tgsi/exec/tgsi_token.h"
33 #include "pipe/tgsi/exec/tgsi_parse.h"
35 #include "pipe/draw/draw_vertex.h"
39 * Simple pass-through fragment shader to use when we don't have
40 * a real shader (or it fails to compile for some reason).
42 static unsigned passthrough
[] =
44 _3DSTATE_PIXEL_SHADER_PROGRAM
| ((2*3)-1),
46 /* declare input color:
49 (REG_TYPE_T
<< D0_TYPE_SHIFT
) |
50 (T_DIFFUSE
<< D0_NR_SHIFT
) |
55 /* move to output color:
58 (REG_TYPE_OC
<< A0_DEST_TYPE_SHIFT
) |
60 (REG_TYPE_T
<< A0_SRC0_TYPE_SHIFT
) |
61 (T_DIFFUSE
<< A0_SRC0_NR_SHIFT
)),
62 0x01230000, /* .xyzw */
67 /* 1, -1/3!, 1/5!, -1/7! */
68 static const float sin_constants
[4] = { 1.0,
70 1.0 / (5 * 4 * 3 * 2 * 1),
71 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
74 /* 1, -1/2!, 1/4!, -1/6! */
75 static const float cos_constants
[4] = { 1.0,
77 1.0 / (4 * 3 * 2 * 1),
78 -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
84 * component-wise negation of ureg
87 negate(int reg
, int x
, int y
, int z
, int w
)
89 /* Another neat thing about the UREG representation */
90 return reg
^ (((x
& 1) << UREG_CHANNEL_X_NEGATE_SHIFT
) |
91 ((y
& 1) << UREG_CHANNEL_Y_NEGATE_SHIFT
) |
92 ((z
& 1) << UREG_CHANNEL_Z_NEGATE_SHIFT
) |
93 ((w
& 1) << UREG_CHANNEL_W_NEGATE_SHIFT
));
98 i915_use_passthrough_shader(struct i915_context
*i915
)
100 fprintf(stderr
, "**** Using i915 pass-through fragment shader\n");
102 i915
->current
.program
= (uint
*) malloc(sizeof(passthrough
));
103 if (i915
->current
.program
) {
104 memcpy(i915
->current
.program
, passthrough
, sizeof(passthrough
));
105 i915
->current
.program_len
= Elements(passthrough
);
108 i915
->current
.num_constants
[PIPE_SHADER_FRAGMENT
] = 0;
109 i915
->current
.num_user_constants
[PIPE_SHADER_FRAGMENT
] = 0;
114 i915_program_error(struct i915_fp_compile
*p
, const char *msg
)
116 fprintf(stderr
, "i915_program_error: %s\n", msg
);
123 * Construct a ureg for the given source register. Will emit
124 * constants, apply swizzling and negation as needed.
127 src_vector(struct i915_fp_compile
*p
,
128 const struct tgsi_full_src_register
*source
)
130 uint index
= source
->SrcRegister
.Index
;
133 switch (source
->SrcRegister
.File
) {
134 case TGSI_FILE_TEMPORARY
:
135 if (source
->SrcRegister
.Index
>= I915_MAX_TEMPORARY
) {
136 i915_program_error(p
, "Exceeded max temporary reg");
139 src
= UREG(REG_TYPE_R
, index
);
141 case TGSI_FILE_INPUT
:
142 /* XXX: Packing COL1, FOGC into a single attribute works for
143 * texenv programs, but will fail for real fragment programs
144 * that use these attributes and expect them to be a full 4
145 * components wide. Could use a texcoord to pass these
146 * attributes if necessary, but that won't work in the general
149 * We also use a texture coordinate to pass wpos when possible.
152 /* use vertex format info to map a slot number to a VF attrib */
153 assert(index
< p
->vertex_info
->num_attribs
);
154 index
= p
->vertex_info
->slot_to_attrib
[index
];
157 case TGSI_ATTRIB_POS
:
158 assert(p
->wpos_tex
!= -1);
159 src
= i915_emit_decl(p
, REG_TYPE_T
, p
->wpos_tex
, D0_CHANNEL_ALL
);
161 case TGSI_ATTRIB_COLOR0
:
162 src
= i915_emit_decl(p
, REG_TYPE_T
, T_DIFFUSE
, D0_CHANNEL_ALL
);
164 case TGSI_ATTRIB_COLOR1
:
165 src
= i915_emit_decl(p
, REG_TYPE_T
, T_SPECULAR
, D0_CHANNEL_XYZ
);
166 src
= swizzle(src
, X
, Y
, Z
, ONE
);
168 case TGSI_ATTRIB_FOG
:
169 src
= i915_emit_decl(p
, REG_TYPE_T
, T_FOG_W
, D0_CHANNEL_W
);
170 src
= swizzle(src
, W
, W
, W
, W
);
172 case TGSI_ATTRIB_TEX0
:
173 case TGSI_ATTRIB_TEX1
:
174 case TGSI_ATTRIB_TEX2
:
175 case TGSI_ATTRIB_TEX3
:
176 case TGSI_ATTRIB_TEX4
:
177 case TGSI_ATTRIB_TEX5
:
178 case TGSI_ATTRIB_TEX6
:
179 case TGSI_ATTRIB_TEX7
:
180 src
= i915_emit_decl(p
, REG_TYPE_T
,
181 T_TEX0
+ (index
- TGSI_ATTRIB_TEX0
),
185 i915_program_error(p
, "Bad source->Index");
190 case TGSI_FILE_CONSTANT
:
191 src
= UREG(REG_TYPE_CONST
, index
);
195 i915_program_error(p
, "Bad source->File");
200 source
->SrcRegister
.SwizzleX
,
201 source
->SrcRegister
.SwizzleY
,
202 source
->SrcRegister
.SwizzleZ
,
203 source
->SrcRegister
.SwizzleW
);
205 /* There's both negate-all-components and per-component negation.
206 * Try to handle both here.
209 int nx
= source
->SrcRegisterExtSwz
.NegateX
;
210 int ny
= source
->SrcRegisterExtSwz
.NegateY
;
211 int nz
= source
->SrcRegisterExtSwz
.NegateZ
;
212 int nw
= source
->SrcRegisterExtSwz
.NegateW
;
213 if (source
->SrcRegister
.Negate
) {
219 src
= negate(src
, nx
, ny
, nz
, nw
);
222 /* no abs() or post-abs negation */
223 assert(!source
->SrcRegisterExtMod
.Absolute
);
224 assert(!source
->SrcRegisterExtMod
.Negate
);
231 * Construct a ureg for a destination register.
234 get_result_vector(struct i915_fp_compile
*p
,
235 const struct tgsi_full_dst_register
*dest
)
237 switch (dest
->DstRegister
.File
) {
238 case TGSI_FILE_OUTPUT
:
239 switch (dest
->DstRegister
.Index
) {
240 case 0: /**TGSI_ATTRIB_POS:**/
241 return UREG(REG_TYPE_OD
, 0);
242 case 1: /**TGSI_ATTRIB_COLOR0:**/
243 return UREG(REG_TYPE_OC
, 0);
245 i915_program_error(p
, "Bad inst->DstReg.Index");
248 case TGSI_FILE_TEMPORARY
:
249 return UREG(REG_TYPE_R
, dest
->DstRegister
.Index
);
251 i915_program_error(p
, "Bad inst->DstReg.File");
258 * Compute flags for saturation and writemask.
261 get_result_flags(const struct tgsi_full_instruction
*inst
)
264 = inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
267 if (inst
->Instruction
.Saturate
== TGSI_SAT_ZERO_ONE
)
268 flags
|= A0_DEST_SATURATE
;
270 if (writeMask
& TGSI_WRITEMASK_X
)
271 flags
|= A0_DEST_CHANNEL_X
;
272 if (writeMask
& TGSI_WRITEMASK_Y
)
273 flags
|= A0_DEST_CHANNEL_Y
;
274 if (writeMask
& TGSI_WRITEMASK_Z
)
275 flags
|= A0_DEST_CHANNEL_Z
;
276 if (writeMask
& TGSI_WRITEMASK_W
)
277 flags
|= A0_DEST_CHANNEL_W
;
284 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
287 translate_tex_src_target(struct i915_fp_compile
*p
, uint tex
)
290 case TGSI_TEXTURE_1D
:
291 return D0_SAMPLE_TYPE_2D
;
292 case TGSI_TEXTURE_2D
:
293 return D0_SAMPLE_TYPE_2D
;
294 case TGSI_TEXTURE_RECT
:
295 return D0_SAMPLE_TYPE_2D
;
296 case TGSI_TEXTURE_3D
:
297 return D0_SAMPLE_TYPE_VOLUME
;
298 case TGSI_TEXTURE_CUBE
:
299 return D0_SAMPLE_TYPE_CUBE
;
301 i915_program_error(p
, "TexSrc type");
308 * Generate texel lookup instruction.
311 emit_tex(struct i915_fp_compile
*p
,
312 const struct tgsi_full_instruction
*inst
,
315 uint texture
= inst
->InstructionExtTexture
.Texture
;
316 uint unit
= inst
->FullSrcRegisters
[1].SrcRegister
.Index
;
317 uint tex
= translate_tex_src_target( p
, texture
);
318 uint sampler
= i915_emit_decl(p
, REG_TYPE_S
, unit
, tex
);
319 uint coord
= src_vector( p
, &inst
->FullSrcRegisters
[0]);
322 get_result_vector( p
, &inst
->FullDstRegisters
[0] ),
323 get_result_flags( inst
),
331 * Generate a simple arithmetic instruction
332 * \param opcode the i915 opcode
333 * \param numArgs the number of input/src arguments
336 emit_simple_arith(struct i915_fp_compile
*p
,
337 const struct tgsi_full_instruction
*inst
,
338 uint opcode
, uint numArgs
)
340 uint arg1
, arg2
, arg3
;
342 assert(numArgs
<= 3);
344 arg1
= (numArgs
< 1) ? 0 : src_vector( p
, &inst
->FullSrcRegisters
[0] );
345 arg2
= (numArgs
< 2) ? 0 : src_vector( p
, &inst
->FullSrcRegisters
[1] );
346 arg3
= (numArgs
< 3) ? 0 : src_vector( p
, &inst
->FullSrcRegisters
[2] );
350 get_result_vector( p
, &inst
->FullDstRegisters
[0]),
351 get_result_flags( inst
), 0,
359 * Translate TGSI instruction to i915 instruction.
363 * SIN, COS -- could use another taylor step?
364 * LIT -- results seem a little different to sw mesa
365 * LOG -- different to mesa on negative numbers, but this is conformant.
368 i915_translate_instruction(struct i915_fp_compile
*p
,
369 const struct tgsi_full_instruction
*inst
)
372 uint src0
, src1
, src2
, flags
;
375 switch (inst
->Instruction
.Opcode
) {
376 case TGSI_OPCODE_ABS
:
377 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
380 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
381 get_result_flags(inst
), 0,
382 src0
, negate(src0
, 1, 1, 1, 1), 0);
385 case TGSI_OPCODE_ADD
:
386 emit_simple_arith(p
, inst
, A0_ADD
, 2);
389 case TGSI_OPCODE_CMP
:
390 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
391 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
392 src2
= src_vector(p
, &inst
->FullSrcRegisters
[2]);
393 i915_emit_arith(p
, A0_CMP
,
394 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
395 get_result_flags(inst
),
396 0, src0
, src2
, src1
); /* NOTE: order of src2, src1 */
399 case TGSI_OPCODE_COS
:
400 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
401 tmp
= i915_get_utemp(p
);
405 tmp
, A0_DEST_CHANNEL_X
, 0,
406 src0
, i915_emit_const1f(p
, 1.0 / (M_PI
* 2)), 0);
408 i915_emit_arith(p
, A0_MOD
, tmp
, A0_DEST_CHANNEL_X
, 0, tmp
, 0, 0);
410 /* By choosing different taylor constants, could get rid of this mul:
414 tmp
, A0_DEST_CHANNEL_X
, 0,
415 tmp
, i915_emit_const1f(p
, (M_PI
* 2)), 0);
418 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
419 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
420 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
421 * result = DP4 t0, cos_constants
425 tmp
, A0_DEST_CHANNEL_XY
, 0,
426 swizzle(tmp
, X
, X
, ONE
, ONE
),
427 swizzle(tmp
, X
, ONE
, ONE
, ONE
), 0);
431 tmp
, A0_DEST_CHANNEL_XYZ
, 0,
432 swizzle(tmp
, X
, Y
, X
, ONE
),
433 swizzle(tmp
, X
, X
, ONE
, ONE
), 0);
437 tmp
, A0_DEST_CHANNEL_XYZ
, 0,
438 swizzle(tmp
, X
, X
, Z
, ONE
),
439 swizzle(tmp
, Z
, ONE
, ONE
, ONE
), 0);
443 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
444 get_result_flags(inst
), 0,
445 swizzle(tmp
, ONE
, Z
, Y
, X
),
446 i915_emit_const4fv(p
, cos_constants
), 0);
449 case TGSI_OPCODE_DP3
:
450 emit_simple_arith(p
, inst
, A0_DP3
, 2);
453 case TGSI_OPCODE_DP4
:
454 emit_simple_arith(p
, inst
, A0_DP4
, 2);
457 case TGSI_OPCODE_DPH
:
458 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
459 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
463 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
464 get_result_flags(inst
), 0,
465 swizzle(src0
, X
, Y
, Z
, ONE
), src1
, 0);
468 case TGSI_OPCODE_DST
:
469 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
470 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
472 /* result[0] = 1 * 1;
473 * result[1] = a[1] * b[1];
474 * result[2] = a[2] * 1;
475 * result[3] = 1 * b[3];
479 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
480 get_result_flags(inst
), 0,
481 swizzle(src0
, ONE
, Y
, Z
, ONE
),
482 swizzle(src1
, ONE
, Y
, ONE
, W
), 0);
485 case TGSI_OPCODE_EX2
:
486 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
490 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
491 get_result_flags(inst
), 0,
492 swizzle(src0
, X
, X
, X
, X
), 0, 0);
495 case TGSI_OPCODE_FLR
:
496 emit_simple_arith(p
, inst
, A0_FLR
, 1);
499 case TGSI_OPCODE_FRC
:
500 emit_simple_arith(p
, inst
, A0_FRC
, 1);
503 case TGSI_OPCODE_KIL
:
504 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
505 tmp
= i915_get_utemp(p
);
507 i915_emit_texld(p
, tmp
, A0_DEST_CHANNEL_ALL
, /* use a dummy dest reg */
508 0, src0
, T0_TEXKILL
);
511 case TGSI_OPCODE_LG2
:
512 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
516 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
517 get_result_flags(inst
), 0,
518 swizzle(src0
, X
, X
, X
, X
), 0, 0);
521 case TGSI_OPCODE_LIT
:
522 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
523 tmp
= i915_get_utemp(p
);
525 /* tmp = max( a.xyzw, a.00zw )
526 * XXX: Clamp tmp.w to -128..128
528 * tmp.y = tmp.w * tmp.y
530 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
532 i915_emit_arith(p
, A0_MAX
, tmp
, A0_DEST_CHANNEL_ALL
, 0,
533 src0
, swizzle(src0
, ZERO
, ZERO
, Z
, W
), 0);
535 i915_emit_arith(p
, A0_LOG
, tmp
, A0_DEST_CHANNEL_Y
, 0,
536 swizzle(tmp
, Y
, Y
, Y
, Y
), 0, 0);
538 i915_emit_arith(p
, A0_MUL
, tmp
, A0_DEST_CHANNEL_Y
, 0,
539 swizzle(tmp
, ZERO
, Y
, ZERO
, ZERO
),
540 swizzle(tmp
, ZERO
, W
, ZERO
, ZERO
), 0);
542 i915_emit_arith(p
, A0_EXP
, tmp
, A0_DEST_CHANNEL_Y
, 0,
543 swizzle(tmp
, Y
, Y
, Y
, Y
), 0, 0);
545 i915_emit_arith(p
, A0_CMP
,
546 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
547 get_result_flags(inst
), 0,
548 negate(swizzle(tmp
, ONE
, ONE
, X
, ONE
), 0, 0, 1, 0),
549 swizzle(tmp
, ONE
, X
, ZERO
, ONE
),
550 swizzle(tmp
, ONE
, X
, Y
, ONE
));
554 case TGSI_OPCODE_LRP
:
555 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
556 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
557 src2
= src_vector(p
, &inst
->FullSrcRegisters
[2]);
558 flags
= get_result_flags(inst
);
559 tmp
= i915_get_utemp(p
);
566 * result = (-c)*a + tmp
568 i915_emit_arith(p
, A0_MAD
, tmp
,
569 flags
& A0_DEST_CHANNEL_ALL
, 0, src1
, src0
, src2
);
571 i915_emit_arith(p
, A0_MAD
,
572 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
573 flags
, 0, negate(src2
, 1, 1, 1, 1), src0
, tmp
);
576 case TGSI_OPCODE_MAD
:
577 emit_simple_arith(p
, inst
, A0_MAD
, 3);
580 case TGSI_OPCODE_MAX
:
581 emit_simple_arith(p
, inst
, A0_MAX
, 2);
584 case TGSI_OPCODE_MIN
:
585 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
586 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
587 tmp
= i915_get_utemp(p
);
588 flags
= get_result_flags(inst
);
592 tmp
, flags
& A0_DEST_CHANNEL_ALL
, 0,
593 negate(src0
, 1, 1, 1, 1),
594 negate(src1
, 1, 1, 1, 1), 0);
598 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
599 flags
, 0, negate(tmp
, 1, 1, 1, 1), 0, 0);
602 case TGSI_OPCODE_MOV
:
603 /* aka TGSI_OPCODE_SWZ */
604 emit_simple_arith(p
, inst
, A0_MOV
, 1);
607 case TGSI_OPCODE_MUL
:
608 emit_simple_arith(p
, inst
, A0_MUL
, 2);
611 case TGSI_OPCODE_POW
:
612 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
613 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
614 tmp
= i915_get_utemp(p
);
615 flags
= get_result_flags(inst
);
617 /* XXX: masking on intermediate values, here and elsewhere.
621 tmp
, A0_DEST_CHANNEL_X
, 0,
622 swizzle(src0
, X
, X
, X
, X
), 0, 0);
624 i915_emit_arith(p
, A0_MUL
, tmp
, A0_DEST_CHANNEL_X
, 0, tmp
, src1
, 0);
628 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
629 flags
, 0, swizzle(tmp
, X
, X
, X
, X
), 0, 0);
632 case TGSI_OPCODE_RCP
:
633 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
637 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
638 get_result_flags(inst
), 0,
639 swizzle(src0
, X
, X
, X
, X
), 0, 0);
642 case TGSI_OPCODE_RSQ
:
643 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
647 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
648 get_result_flags(inst
), 0,
649 swizzle(src0
, X
, X
, X
, X
), 0, 0);
652 case TGSI_OPCODE_SCS
:
653 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
654 tmp
= i915_get_utemp(p
);
657 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
658 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
659 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
660 * scs.x = DP4 t1, sin_constants
661 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
662 * scs.y = DP4 t1, cos_constants
666 tmp
, A0_DEST_CHANNEL_XY
, 0,
667 swizzle(src0
, X
, X
, ONE
, ONE
),
668 swizzle(src0
, X
, ONE
, ONE
, ONE
), 0);
672 tmp
, A0_DEST_CHANNEL_ALL
, 0,
673 swizzle(tmp
, X
, Y
, X
, Y
),
674 swizzle(tmp
, X
, X
, ONE
, ONE
), 0);
676 writemask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
678 if (writemask
& TGSI_WRITEMASK_Y
) {
681 if (writemask
& TGSI_WRITEMASK_X
)
682 tmp1
= i915_get_utemp(p
);
688 tmp1
, A0_DEST_CHANNEL_ALL
, 0,
689 swizzle(tmp
, X
, Y
, Y
, W
),
690 swizzle(tmp
, X
, Z
, ONE
, ONE
), 0);
694 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
695 A0_DEST_CHANNEL_Y
, 0,
696 swizzle(tmp1
, W
, Z
, Y
, X
),
697 i915_emit_const4fv(p
, sin_constants
), 0);
700 if (writemask
& TGSI_WRITEMASK_X
) {
703 tmp
, A0_DEST_CHANNEL_XYZ
, 0,
704 swizzle(tmp
, X
, X
, Z
, ONE
),
705 swizzle(tmp
, Z
, ONE
, ONE
, ONE
), 0);
709 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
710 A0_DEST_CHANNEL_X
, 0,
711 swizzle(tmp
, ONE
, Z
, Y
, X
),
712 i915_emit_const4fv(p
, cos_constants
), 0);
716 case TGSI_OPCODE_SGE
:
717 emit_simple_arith(p
, inst
, A0_SGE
, 2);
720 case TGSI_OPCODE_SIN
:
721 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
722 tmp
= i915_get_utemp(p
);
726 tmp
, A0_DEST_CHANNEL_X
, 0,
727 src0
, i915_emit_const1f(p
, 1.0 / (M_PI
* 2)), 0);
729 i915_emit_arith(p
, A0_MOD
, tmp
, A0_DEST_CHANNEL_X
, 0, tmp
, 0, 0);
731 /* By choosing different taylor constants, could get rid of this mul:
735 tmp
, A0_DEST_CHANNEL_X
, 0,
736 tmp
, i915_emit_const1f(p
, (M_PI
* 2)), 0);
739 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
740 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
741 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
742 * result = DP4 t1.wzyx, sin_constants
746 tmp
, A0_DEST_CHANNEL_XY
, 0,
747 swizzle(tmp
, X
, X
, ONE
, ONE
),
748 swizzle(tmp
, X
, ONE
, ONE
, ONE
), 0);
752 tmp
, A0_DEST_CHANNEL_ALL
, 0,
753 swizzle(tmp
, X
, Y
, X
, Y
),
754 swizzle(tmp
, X
, X
, ONE
, ONE
), 0);
758 tmp
, A0_DEST_CHANNEL_ALL
, 0,
759 swizzle(tmp
, X
, Y
, Y
, W
),
760 swizzle(tmp
, X
, Z
, ONE
, ONE
), 0);
764 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
765 get_result_flags(inst
), 0,
766 swizzle(tmp
, W
, Z
, Y
, X
),
767 i915_emit_const4fv(p
, sin_constants
), 0);
770 case TGSI_OPCODE_SLT
:
771 emit_simple_arith(p
, inst
, A0_SLT
, 2);
774 case TGSI_OPCODE_SUB
:
775 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
776 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
780 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
781 get_result_flags(inst
), 0,
782 src0
, negate(src1
, 1, 1, 1, 1), 0);
785 case TGSI_OPCODE_TEX
:
786 emit_tex(p
, inst
, T0_TEXLD
);
789 case TGSI_OPCODE_TXB
:
790 emit_tex(p
, inst
, T0_TEXLDB
);
793 case TGSI_OPCODE_TXP
:
794 emit_tex(p
, inst
, T0_TEXLDP
);
797 case TGSI_OPCODE_XPD
:
799 * result.x = src0.y * src1.z - src0.z * src1.y;
800 * result.y = src0.z * src1.x - src0.x * src1.z;
801 * result.z = src0.x * src1.y - src0.y * src1.x;
804 src0
= src_vector(p
, &inst
->FullSrcRegisters
[0]);
805 src1
= src_vector(p
, &inst
->FullSrcRegisters
[1]);
806 tmp
= i915_get_utemp(p
);
810 tmp
, A0_DEST_CHANNEL_ALL
, 0,
811 swizzle(src0
, Z
, X
, Y
, ONE
),
812 swizzle(src1
, Y
, Z
, X
, ONE
), 0);
816 get_result_vector(p
, &inst
->FullDstRegisters
[0]),
817 get_result_flags(inst
), 0,
818 swizzle(src0
, Y
, Z
, X
, ONE
),
819 swizzle(src1
, Z
, X
, Y
, ONE
),
820 negate(tmp
, 1, 1, 1, 0));
824 i915_program_error(p
, "bad opcode");
828 i915_release_utemps(p
);
833 * Translate TGSI fragment shader into i915 hardware instructions.
834 * \param p the translation state
835 * \param tokens the TGSI token array
838 i915_translate_instructions(struct i915_fp_compile
*p
,
839 const struct tgsi_token
*tokens
)
841 struct tgsi_parse_context parse
;
843 tgsi_parse_init( &parse
, tokens
);
845 while( !tgsi_parse_end_of_tokens( &parse
) ) {
847 tgsi_parse_token( &parse
);
849 switch( parse
.FullToken
.Token
.Type
) {
850 case TGSI_TOKEN_TYPE_DECLARATION
:
854 case TGSI_TOKEN_TYPE_IMMEDIATE
:
859 case TGSI_TOKEN_TYPE_INSTRUCTION
:
860 i915_translate_instruction(p
, &parse
.FullToken
.FullInstruction
);
869 tgsi_parse_free (&parse
);
873 static struct i915_fp_compile
*
874 i915_init_compile(struct i915_context
*i915
,
875 struct pipe_shader_state
*fs
)
877 struct i915_fp_compile
*p
= CALLOC_STRUCT(i915_fp_compile
);
879 p
->shader
= &i915
->fs
;
881 p
->vertex_info
= &i915
->current
.vertex_info
;
883 /* new constants found during translation get appended after the
884 * user-provided constants.
886 p
->constants
= i915
->current
.constants
[PIPE_SHADER_FRAGMENT
];
887 p
->num_constants
= i915
->current
.num_user_constants
[PIPE_SHADER_FRAGMENT
];
889 p
->nr_tex_indirect
= 1; /* correct? */
894 memset(p
->constant_flags
, 0, sizeof(p
->constant_flags
));
897 p
->decl
= p
->declarations
;
900 p
->temp_flag
= 0xffff000;
901 p
->utemp_flag
= ~0x7;
905 /* initialize the first program word */
906 *(p
->decl
++) = _3DSTATE_PIXEL_SHADER_PROGRAM
;
912 /* Copy compile results to the fragment program struct and destroy the
913 * compilation context.
916 i915_fini_compile(struct i915_context
*i915
, struct i915_fp_compile
*p
)
918 uint program_size
= p
->csr
- p
->program
;
919 uint decl_size
= p
->decl
- p
->declarations
;
921 if (p
->nr_tex_indirect
> I915_MAX_TEX_INDIRECT
)
922 i915_program_error(p
, "Exceeded max nr indirect texture lookups");
924 if (p
->nr_tex_insn
> I915_MAX_TEX_INSN
)
925 i915_program_error(p
, "Exceeded max TEX instructions");
927 if (p
->nr_alu_insn
> I915_MAX_ALU_INSN
)
928 i915_program_error(p
, "Exceeded max ALU instructions");
930 if (p
->nr_decl_insn
> I915_MAX_DECL_INSN
)
931 i915_program_error(p
, "Exceeded max DECL instructions");
933 /* free old program, if present */
934 if (i915
->current
.program
) {
935 free(i915
->current
.program
);
936 i915
->current
.program_len
= 0;
940 p
->NumNativeInstructions
= 0;
941 p
->NumNativeAluInstructions
= 0;
942 p
->NumNativeTexInstructions
= 0;
943 p
->NumNativeTexIndirections
= 0;
945 i915_use_passthrough_shader(i915
);
948 p
->NumNativeInstructions
949 = p
->nr_alu_insn
+ p
->nr_tex_insn
+ p
->nr_decl_insn
;
950 p
->NumNativeAluInstructions
= p
->nr_alu_insn
;
951 p
->NumNativeTexInstructions
= p
->nr_tex_insn
;
952 p
->NumNativeTexIndirections
= p
->nr_tex_indirect
;
954 /* patch in the program length */
955 p
->declarations
[0] |= program_size
+ decl_size
- 2;
957 /* Copy compilation results to fragment program struct:
959 i915
->current
.program
960 = (uint
*) malloc((program_size
+ decl_size
) * sizeof(uint
));
961 if (i915
->current
.program
) {
962 i915
->current
.program_len
= program_size
+ decl_size
;
964 memcpy(i915
->current
.program
,
966 decl_size
* sizeof(uint
));
968 memcpy(i915
->current
.program
+ decl_size
,
970 program_size
* sizeof(uint
));
973 /* update number of constants */
974 i915
->current
.num_constants
[PIPE_SHADER_FRAGMENT
] = p
->num_constants
;
975 assert(i915
->current
.num_constants
[PIPE_SHADER_FRAGMENT
]
976 >= i915
->current
.num_user_constants
[PIPE_SHADER_FRAGMENT
]);
979 /* Release the compilation struct:
986 * Find an unused texture coordinate slot to use for fragment WPOS.
987 * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
990 i915_find_wpos_space(struct i915_fp_compile
*p
)
993 = p
->shader
->inputs_read
| (1 << TGSI_ATTRIB_POS
); /*XXX hack*/
998 if (inputs
& (1 << TGSI_ATTRIB_POS
)) {
999 for (i
= 0; i
< I915_TEX_UNITS
; i
++) {
1000 if ((inputs
& (1 << (TGSI_ATTRIB_TEX0
+ i
))) == 0) {
1006 i915_program_error(p
, "No free texcoord for wpos value");
1014 * Rather than trying to intercept and jiggle depth writes during
1015 * emit, just move the value into its correct position at the end of
1019 i915_fixup_depth_write(struct i915_fp_compile
*p
)
1021 if (p
->shader
->outputs_written
& (1 << TGSI_ATTRIB_POS
)) {
1022 uint depth
= UREG(REG_TYPE_OD
, 0);
1026 depth
, A0_DEST_CHANNEL_W
, 0,
1027 swizzle(depth
, X
, Y
, Z
, Z
), 0, 0);
1033 i915_translate_fragment_program( struct i915_context
*i915
)
1035 struct i915_fp_compile
*p
= i915_init_compile(i915
, &i915
->fs
);
1036 const struct tgsi_token
*tokens
= i915
->fs
.tokens
;
1038 i915_find_wpos_space(p
);
1040 i915_translate_instructions(p
, tokens
);
1041 i915_fixup_depth_write(p
);
1043 i915_fini_compile(i915
, p
);