i915g: Fix a bug in facing.
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_translate.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <stdarg.h>
30
31 #include "i915_reg.h"
32 #include "i915_context.h"
33 #include "i915_fpc.h"
34
35 #include "pipe/p_shader_tokens.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_string.h"
39 #include "tgsi/tgsi_parse.h"
40 #include "tgsi/tgsi_dump.h"
41
42 #include "draw/draw_vertex.h"
43
44
45 /**
46 * Simple pass-through fragment shader to use when we don't have
47 * a real shader (or it fails to compile for some reason).
48 */
49 static unsigned passthrough[] =
50 {
51 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
52
53 /* declare input color:
54 */
55 (D0_DCL |
56 (REG_TYPE_T << D0_TYPE_SHIFT) |
57 (T_DIFFUSE << D0_NR_SHIFT) |
58 D0_CHANNEL_ALL),
59 0,
60 0,
61
62 /* move to output color:
63 */
64 (A0_MOV |
65 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
66 A0_DEST_CHANNEL_ALL |
67 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) |
68 (T_DIFFUSE << A0_SRC0_NR_SHIFT)),
69 0x01230000, /* .xyzw */
70 0
71 };
72
73
74 /* 1, -1/3!, 1/5!, -1/7! */
75 static const float sin_constants[4] = { 1.0,
76 -1.0f / (3 * 2 * 1),
77 1.0f / (5 * 4 * 3 * 2 * 1),
78 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
79 };
80
81 /* 1, -1/2!, 1/4!, -1/6! */
82 static const float cos_constants[4] = { 1.0,
83 -1.0f / (2 * 1),
84 1.0f / (4 * 3 * 2 * 1),
85 -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
86 };
87
88
89
90 /**
91 * component-wise negation of ureg
92 */
93 static INLINE int
94 negate(int reg, int x, int y, int z, int w)
95 {
96 /* Another neat thing about the UREG representation */
97 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
98 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
99 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
100 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
101 }
102
103
104 /**
105 * In the event of a translation failure, we'll generate a simple color
106 * pass-through program.
107 */
108 static void
109 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
110 {
111 fs->program = (uint *) MALLOC(sizeof(passthrough));
112 if (fs->program) {
113 memcpy(fs->program, passthrough, sizeof(passthrough));
114 fs->program_len = Elements(passthrough);
115 }
116 fs->num_constants = 0;
117 }
118
119
120 void
121 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
122 {
123 va_list args;
124 char buffer[1024];
125
126 debug_printf("i915_program_error: ");
127 va_start( args, msg );
128 util_vsnprintf( buffer, sizeof(buffer), msg, args );
129 va_end( args );
130 debug_printf("%s", buffer);
131 debug_printf("\n");
132
133 p->error = 1;
134 }
135
136 static uint get_mapping(struct i915_fragment_shader* fs, int unit)
137 {
138 int i;
139 for (i = 0; i < I915_TEX_UNITS; i++)
140 {
141 if (fs->generic_mapping[i] == -1) {
142 fs->generic_mapping[i] = unit;
143 return i;
144 }
145 if (fs->generic_mapping[i] == unit)
146 return i;
147 }
148 debug_printf("Exceeded max generics\n");
149 return 0;
150 }
151
152 /**
153 * Construct a ureg for the given source register. Will emit
154 * constants, apply swizzling and negation as needed.
155 */
156 static uint
157 src_vector(struct i915_fp_compile *p,
158 const struct tgsi_full_src_register *source,
159 struct i915_fragment_shader* fs)
160 {
161 uint index = source->Register.Index;
162 uint src = 0, sem_name, sem_ind;
163
164 switch (source->Register.File) {
165 case TGSI_FILE_TEMPORARY:
166 if (source->Register.Index >= I915_MAX_TEMPORARY) {
167 i915_program_error(p, "Exceeded max temporary reg");
168 return 0;
169 }
170 src = UREG(REG_TYPE_R, index);
171 break;
172 case TGSI_FILE_INPUT:
173 /* XXX: Packing COL1, FOGC into a single attribute works for
174 * texenv programs, but will fail for real fragment programs
175 * that use these attributes and expect them to be a full 4
176 * components wide. Could use a texcoord to pass these
177 * attributes if necessary, but that won't work in the general
178 * case.
179 *
180 * We also use a texture coordinate to pass wpos when possible.
181 */
182
183 sem_name = p->shader->info.input_semantic_name[index];
184 sem_ind = p->shader->info.input_semantic_index[index];
185
186 switch (sem_name) {
187 case TGSI_SEMANTIC_POSITION:
188 {
189 /* for fragcoord */
190 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
191 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
192 break;
193 }
194 case TGSI_SEMANTIC_COLOR:
195 if (sem_ind == 0) {
196 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
197 }
198 else {
199 /* secondary color */
200 assert(sem_ind == 1);
201 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
202 src = swizzle(src, X, Y, Z, ONE);
203 }
204 break;
205 case TGSI_SEMANTIC_FOG:
206 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
207 src = swizzle(src, W, W, W, W);
208 break;
209 case TGSI_SEMANTIC_GENERIC:
210 {
211 int real_tex_unit = get_mapping(fs, sem_ind);
212 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
213 break;
214 }
215 case TGSI_SEMANTIC_FACE:
216 {
217 /* for back/front faces */
218 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);
219 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
220 break;
221 }
222 default:
223 i915_program_error(p, "Bad source->Index");
224 return 0;
225 }
226 break;
227
228 case TGSI_FILE_IMMEDIATE:
229 assert(index < p->num_immediates);
230 index = p->immediates_map[index];
231 /* fall-through */
232 case TGSI_FILE_CONSTANT:
233 src = UREG(REG_TYPE_CONST, index);
234 break;
235
236 default:
237 i915_program_error(p, "Bad source->File");
238 return 0;
239 }
240
241 src = swizzle(src,
242 source->Register.SwizzleX,
243 source->Register.SwizzleY,
244 source->Register.SwizzleZ,
245 source->Register.SwizzleW);
246
247 /* There's both negate-all-components and per-component negation.
248 * Try to handle both here.
249 */
250 {
251 int n = source->Register.Negate;
252 src = negate(src, n, n, n, n);
253 }
254
255 /* no abs() */
256 #if 0
257 /* XXX assertions disabled to allow arbfplight.c to run */
258 /* XXX enable these assertions, or fix things */
259 assert(!source->Register.Absolute);
260 #endif
261 return src;
262 }
263
264
265 /**
266 * Construct a ureg for a destination register.
267 */
268 static uint
269 get_result_vector(struct i915_fp_compile *p,
270 const struct tgsi_full_dst_register *dest)
271 {
272 switch (dest->Register.File) {
273 case TGSI_FILE_OUTPUT:
274 {
275 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
276 switch (sem_name) {
277 case TGSI_SEMANTIC_POSITION:
278 return UREG(REG_TYPE_OD, 0);
279 case TGSI_SEMANTIC_COLOR:
280 return UREG(REG_TYPE_OC, 0);
281 default:
282 i915_program_error(p, "Bad inst->DstReg.Index/semantics");
283 return 0;
284 }
285 }
286 case TGSI_FILE_TEMPORARY:
287 return UREG(REG_TYPE_R, dest->Register.Index);
288 default:
289 i915_program_error(p, "Bad inst->DstReg.File");
290 return 0;
291 }
292 }
293
294
295 /**
296 * Compute flags for saturation and writemask.
297 */
298 static uint
299 get_result_flags(const struct tgsi_full_instruction *inst)
300 {
301 const uint writeMask
302 = inst->Dst[0].Register.WriteMask;
303 uint flags = 0x0;
304
305 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
306 flags |= A0_DEST_SATURATE;
307
308 if (writeMask & TGSI_WRITEMASK_X)
309 flags |= A0_DEST_CHANNEL_X;
310 if (writeMask & TGSI_WRITEMASK_Y)
311 flags |= A0_DEST_CHANNEL_Y;
312 if (writeMask & TGSI_WRITEMASK_Z)
313 flags |= A0_DEST_CHANNEL_Z;
314 if (writeMask & TGSI_WRITEMASK_W)
315 flags |= A0_DEST_CHANNEL_W;
316
317 return flags;
318 }
319
320
321 /**
322 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
323 */
324 static uint
325 translate_tex_src_target(struct i915_fp_compile *p, uint tex)
326 {
327 switch (tex) {
328 case TGSI_TEXTURE_SHADOW1D:
329 /* fall-through */
330 case TGSI_TEXTURE_1D:
331 return D0_SAMPLE_TYPE_2D;
332
333 case TGSI_TEXTURE_SHADOW2D:
334 /* fall-through */
335 case TGSI_TEXTURE_2D:
336 return D0_SAMPLE_TYPE_2D;
337
338 case TGSI_TEXTURE_SHADOWRECT:
339 /* fall-through */
340 case TGSI_TEXTURE_RECT:
341 return D0_SAMPLE_TYPE_2D;
342
343 case TGSI_TEXTURE_3D:
344 return D0_SAMPLE_TYPE_VOLUME;
345
346 case TGSI_TEXTURE_CUBE:
347 return D0_SAMPLE_TYPE_CUBE;
348
349 default:
350 i915_program_error(p, "TexSrc type");
351 return 0;
352 }
353 }
354
355
356 /**
357 * Generate texel lookup instruction.
358 */
359 static void
360 emit_tex(struct i915_fp_compile *p,
361 const struct tgsi_full_instruction *inst,
362 uint opcode,
363 struct i915_fragment_shader* fs)
364 {
365 uint texture = inst->Texture.Texture;
366 uint unit = inst->Src[1].Register.Index;
367 uint tex = translate_tex_src_target( p, texture );
368 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
369 uint coord = src_vector( p, &inst->Src[0], fs);
370
371 i915_emit_texld( p,
372 get_result_vector( p, &inst->Dst[0] ),
373 get_result_flags( inst ),
374 sampler,
375 coord,
376 opcode);
377 }
378
379
380 /**
381 * Generate a simple arithmetic instruction
382 * \param opcode the i915 opcode
383 * \param numArgs the number of input/src arguments
384 */
385 static void
386 emit_simple_arith(struct i915_fp_compile *p,
387 const struct tgsi_full_instruction *inst,
388 uint opcode, uint numArgs,
389 struct i915_fragment_shader* fs)
390 {
391 uint arg1, arg2, arg3;
392
393 assert(numArgs <= 3);
394
395 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
396 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
397 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
398
399 i915_emit_arith( p,
400 opcode,
401 get_result_vector( p, &inst->Dst[0]),
402 get_result_flags( inst ), 0,
403 arg1,
404 arg2,
405 arg3 );
406 }
407
408
409 /** As above, but swap the first two src regs */
410 static void
411 emit_simple_arith_swap2(struct i915_fp_compile *p,
412 const struct tgsi_full_instruction *inst,
413 uint opcode, uint numArgs,
414 struct i915_fragment_shader* fs)
415 {
416 struct tgsi_full_instruction inst2;
417
418 assert(numArgs == 2);
419
420 /* transpose first two registers */
421 inst2 = *inst;
422 inst2.Src[0] = inst->Src[1];
423 inst2.Src[1] = inst->Src[0];
424
425 emit_simple_arith(p, &inst2, opcode, numArgs, fs);
426 }
427
428
429 #ifndef M_PI
430 #define M_PI 3.14159265358979323846
431 #endif
432
433 /*
434 * Translate TGSI instruction to i915 instruction.
435 *
436 * Possible concerns:
437 *
438 * SIN, COS -- could use another taylor step?
439 * LIT -- results seem a little different to sw mesa
440 * LOG -- different to mesa on negative numbers, but this is conformant.
441 */
442 static void
443 i915_translate_instruction(struct i915_fp_compile *p,
444 const struct tgsi_full_instruction *inst,
445 struct i915_fragment_shader *fs)
446 {
447 uint writemask;
448 uint src0, src1, src2, flags;
449 uint tmp = 0;
450
451 switch (inst->Instruction.Opcode) {
452 case TGSI_OPCODE_ABS:
453 src0 = src_vector(p, &inst->Src[0], fs);
454 i915_emit_arith(p,
455 A0_MAX,
456 get_result_vector(p, &inst->Dst[0]),
457 get_result_flags(inst), 0,
458 src0, negate(src0, 1, 1, 1, 1), 0);
459 break;
460
461 case TGSI_OPCODE_ADD:
462 emit_simple_arith(p, inst, A0_ADD, 2, fs);
463 break;
464
465 case TGSI_OPCODE_CMP:
466 src0 = src_vector(p, &inst->Src[0], fs);
467 src1 = src_vector(p, &inst->Src[1], fs);
468 src2 = src_vector(p, &inst->Src[2], fs);
469 i915_emit_arith(p, A0_CMP,
470 get_result_vector(p, &inst->Dst[0]),
471 get_result_flags(inst),
472 0, src0, src2, src1); /* NOTE: order of src2, src1 */
473 break;
474
475 case TGSI_OPCODE_COS:
476 src0 = src_vector(p, &inst->Src[0], fs);
477 tmp = i915_get_utemp(p);
478
479 i915_emit_arith(p,
480 A0_MUL,
481 tmp, A0_DEST_CHANNEL_X, 0,
482 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
483
484 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
485
486 /* By choosing different taylor constants, could get rid of this mul:
487 */
488 i915_emit_arith(p,
489 A0_MUL,
490 tmp, A0_DEST_CHANNEL_X, 0,
491 tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
492
493 /*
494 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
495 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
496 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
497 * result = DP4 t0, cos_constants
498 */
499 i915_emit_arith(p,
500 A0_MUL,
501 tmp, A0_DEST_CHANNEL_XY, 0,
502 swizzle(tmp, X, X, ONE, ONE),
503 swizzle(tmp, X, ONE, ONE, ONE), 0);
504
505 i915_emit_arith(p,
506 A0_MUL,
507 tmp, A0_DEST_CHANNEL_XYZ, 0,
508 swizzle(tmp, X, Y, X, ONE),
509 swizzle(tmp, X, X, ONE, ONE), 0);
510
511 i915_emit_arith(p,
512 A0_MUL,
513 tmp, A0_DEST_CHANNEL_XYZ, 0,
514 swizzle(tmp, X, X, Z, ONE),
515 swizzle(tmp, Z, ONE, ONE, ONE), 0);
516
517 i915_emit_arith(p,
518 A0_DP4,
519 get_result_vector(p, &inst->Dst[0]),
520 get_result_flags(inst), 0,
521 swizzle(tmp, ONE, Z, Y, X),
522 i915_emit_const4fv(p, cos_constants), 0);
523 break;
524
525 case TGSI_OPCODE_DDX:
526 case TGSI_OPCODE_DDY:
527 /* XXX We just output 0 here */
528 debug_printf("Punting DDX/DDX\n");
529 src0 = get_result_vector(p, &inst->Dst[0]);
530 i915_emit_arith(p,
531 A0_MOV,
532 get_result_vector(p, &inst->Dst[0]),
533 get_result_flags(inst), 0,
534 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
535 break;
536
537 case TGSI_OPCODE_DP2:
538 src0 = src_vector(p, &inst->Src[0], fs);
539 src1 = src_vector(p, &inst->Src[1], fs);
540
541 i915_emit_arith(p,
542 A0_DP3,
543 get_result_vector(p, &inst->Dst[0]),
544 get_result_flags(inst), 0,
545 swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
546 break;
547
548 case TGSI_OPCODE_DP3:
549 emit_simple_arith(p, inst, A0_DP3, 2, fs);
550 break;
551
552 case TGSI_OPCODE_DP4:
553 emit_simple_arith(p, inst, A0_DP4, 2, fs);
554 break;
555
556 case TGSI_OPCODE_DPH:
557 src0 = src_vector(p, &inst->Src[0], fs);
558 src1 = src_vector(p, &inst->Src[1], fs);
559
560 i915_emit_arith(p,
561 A0_DP4,
562 get_result_vector(p, &inst->Dst[0]),
563 get_result_flags(inst), 0,
564 swizzle(src0, X, Y, Z, ONE), src1, 0);
565 break;
566
567 case TGSI_OPCODE_DST:
568 src0 = src_vector(p, &inst->Src[0], fs);
569 src1 = src_vector(p, &inst->Src[1], fs);
570
571 /* result[0] = 1 * 1;
572 * result[1] = a[1] * b[1];
573 * result[2] = a[2] * 1;
574 * result[3] = 1 * b[3];
575 */
576 i915_emit_arith(p,
577 A0_MUL,
578 get_result_vector(p, &inst->Dst[0]),
579 get_result_flags(inst), 0,
580 swizzle(src0, ONE, Y, Z, ONE),
581 swizzle(src1, ONE, Y, ONE, W), 0);
582 break;
583
584 case TGSI_OPCODE_END:
585 /* no-op */
586 break;
587
588 case TGSI_OPCODE_EX2:
589 src0 = src_vector(p, &inst->Src[0], fs);
590
591 i915_emit_arith(p,
592 A0_EXP,
593 get_result_vector(p, &inst->Dst[0]),
594 get_result_flags(inst), 0,
595 swizzle(src0, X, X, X, X), 0, 0);
596 break;
597
598 case TGSI_OPCODE_FLR:
599 emit_simple_arith(p, inst, A0_FLR, 1, fs);
600 break;
601
602 case TGSI_OPCODE_FRC:
603 emit_simple_arith(p, inst, A0_FRC, 1, fs);
604 break;
605
606 case TGSI_OPCODE_KIL:
607 /* kill if src[0].x < 0 || src[0].y < 0 ... */
608 src0 = src_vector(p, &inst->Src[0], fs);
609 tmp = i915_get_utemp(p);
610
611 i915_emit_texld(p,
612 tmp, /* dest reg: a dummy reg */
613 A0_DEST_CHANNEL_ALL, /* dest writemask */
614 0, /* sampler */
615 src0, /* coord*/
616 T0_TEXKILL); /* opcode */
617 break;
618
619 case TGSI_OPCODE_KILP:
620 assert(0); /* not tested yet */
621 break;
622
623 case TGSI_OPCODE_LG2:
624 src0 = src_vector(p, &inst->Src[0], fs);
625
626 i915_emit_arith(p,
627 A0_LOG,
628 get_result_vector(p, &inst->Dst[0]),
629 get_result_flags(inst), 0,
630 swizzle(src0, X, X, X, X), 0, 0);
631 break;
632
633 case TGSI_OPCODE_LIT:
634 src0 = src_vector(p, &inst->Src[0], fs);
635 tmp = i915_get_utemp(p);
636
637 /* tmp = max( a.xyzw, a.00zw )
638 * XXX: Clamp tmp.w to -128..128
639 * tmp.y = log(tmp.y)
640 * tmp.y = tmp.w * tmp.y
641 * tmp.y = exp(tmp.y)
642 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
643 */
644 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
645 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
646
647 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
648 swizzle(tmp, Y, Y, Y, Y), 0, 0);
649
650 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
651 swizzle(tmp, ZERO, Y, ZERO, ZERO),
652 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
653
654 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
655 swizzle(tmp, Y, Y, Y, Y), 0, 0);
656
657 i915_emit_arith(p, A0_CMP,
658 get_result_vector(p, &inst->Dst[0]),
659 get_result_flags(inst), 0,
660 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
661 swizzle(tmp, ONE, X, ZERO, ONE),
662 swizzle(tmp, ONE, X, Y, ONE));
663
664 break;
665
666 case TGSI_OPCODE_LRP:
667 src0 = src_vector(p, &inst->Src[0], fs);
668 src1 = src_vector(p, &inst->Src[1], fs);
669 src2 = src_vector(p, &inst->Src[2], fs);
670 flags = get_result_flags(inst);
671 tmp = i915_get_utemp(p);
672
673 /* b*a + c*(1-a)
674 *
675 * b*a + c - ca
676 *
677 * tmp = b*a + c,
678 * result = (-c)*a + tmp
679 */
680 i915_emit_arith(p, A0_MAD, tmp,
681 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
682
683 i915_emit_arith(p, A0_MAD,
684 get_result_vector(p, &inst->Dst[0]),
685 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
686 break;
687
688 case TGSI_OPCODE_MAD:
689 emit_simple_arith(p, inst, A0_MAD, 3, fs);
690 break;
691
692 case TGSI_OPCODE_MAX:
693 emit_simple_arith(p, inst, A0_MAX, 2, fs);
694 break;
695
696 case TGSI_OPCODE_MIN:
697 src0 = src_vector(p, &inst->Src[0], fs);
698 src1 = src_vector(p, &inst->Src[1], fs);
699 tmp = i915_get_utemp(p);
700 flags = get_result_flags(inst);
701
702 i915_emit_arith(p,
703 A0_MAX,
704 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
705 negate(src0, 1, 1, 1, 1),
706 negate(src1, 1, 1, 1, 1), 0);
707
708 i915_emit_arith(p,
709 A0_MOV,
710 get_result_vector(p, &inst->Dst[0]),
711 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
712 break;
713
714 case TGSI_OPCODE_MOV:
715 emit_simple_arith(p, inst, A0_MOV, 1, fs);
716 break;
717
718 case TGSI_OPCODE_MUL:
719 emit_simple_arith(p, inst, A0_MUL, 2, fs);
720 break;
721
722 case TGSI_OPCODE_POW:
723 src0 = src_vector(p, &inst->Src[0], fs);
724 src1 = src_vector(p, &inst->Src[1], fs);
725 tmp = i915_get_utemp(p);
726 flags = get_result_flags(inst);
727
728 /* XXX: masking on intermediate values, here and elsewhere.
729 */
730 i915_emit_arith(p,
731 A0_LOG,
732 tmp, A0_DEST_CHANNEL_X, 0,
733 swizzle(src0, X, X, X, X), 0, 0);
734
735 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
736
737 i915_emit_arith(p,
738 A0_EXP,
739 get_result_vector(p, &inst->Dst[0]),
740 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
741 break;
742
743 case TGSI_OPCODE_RET:
744 /* XXX: no-op? */
745 break;
746
747 case TGSI_OPCODE_RCP:
748 src0 = src_vector(p, &inst->Src[0], fs);
749
750 i915_emit_arith(p,
751 A0_RCP,
752 get_result_vector(p, &inst->Dst[0]),
753 get_result_flags(inst), 0,
754 swizzle(src0, X, X, X, X), 0, 0);
755 break;
756
757 case TGSI_OPCODE_RSQ:
758 src0 = src_vector(p, &inst->Src[0], fs);
759
760 i915_emit_arith(p,
761 A0_RSQ,
762 get_result_vector(p, &inst->Dst[0]),
763 get_result_flags(inst), 0,
764 swizzle(src0, X, X, X, X), 0, 0);
765 break;
766
767 case TGSI_OPCODE_SCS:
768 src0 = src_vector(p, &inst->Src[0], fs);
769 tmp = i915_get_utemp(p);
770
771 /*
772 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
773 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
774 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
775 * scs.x = DP4 t1, sin_constants
776 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
777 * scs.y = DP4 t1, cos_constants
778 */
779 i915_emit_arith(p,
780 A0_MUL,
781 tmp, A0_DEST_CHANNEL_XY, 0,
782 swizzle(src0, X, X, ONE, ONE),
783 swizzle(src0, X, ONE, ONE, ONE), 0);
784
785 i915_emit_arith(p,
786 A0_MUL,
787 tmp, A0_DEST_CHANNEL_ALL, 0,
788 swizzle(tmp, X, Y, X, Y),
789 swizzle(tmp, X, X, ONE, ONE), 0);
790
791 writemask = inst->Dst[0].Register.WriteMask;
792
793 if (writemask & TGSI_WRITEMASK_Y) {
794 uint tmp1;
795
796 if (writemask & TGSI_WRITEMASK_X)
797 tmp1 = i915_get_utemp(p);
798 else
799 tmp1 = tmp;
800
801 i915_emit_arith(p,
802 A0_MUL,
803 tmp1, A0_DEST_CHANNEL_ALL, 0,
804 swizzle(tmp, X, Y, Y, W),
805 swizzle(tmp, X, Z, ONE, ONE), 0);
806
807 i915_emit_arith(p,
808 A0_DP4,
809 get_result_vector(p, &inst->Dst[0]),
810 A0_DEST_CHANNEL_Y, 0,
811 swizzle(tmp1, W, Z, Y, X),
812 i915_emit_const4fv(p, sin_constants), 0);
813 }
814
815 if (writemask & TGSI_WRITEMASK_X) {
816 i915_emit_arith(p,
817 A0_MUL,
818 tmp, A0_DEST_CHANNEL_XYZ, 0,
819 swizzle(tmp, X, X, Z, ONE),
820 swizzle(tmp, Z, ONE, ONE, ONE), 0);
821
822 i915_emit_arith(p,
823 A0_DP4,
824 get_result_vector(p, &inst->Dst[0]),
825 A0_DEST_CHANNEL_X, 0,
826 swizzle(tmp, ONE, Z, Y, X),
827 i915_emit_const4fv(p, cos_constants), 0);
828 }
829 break;
830
831 case TGSI_OPCODE_SEQ:
832 /* if we're both >= and <= then we're == */
833 src0 = src_vector(p, &inst->Src[0], fs);
834 src1 = src_vector(p, &inst->Src[1], fs);
835 tmp = i915_get_utemp(p);
836
837 i915_emit_arith(p,
838 A0_SGE,
839 tmp, A0_DEST_CHANNEL_ALL, 0,
840 src0,
841 src1, 0);
842
843 i915_emit_arith(p,
844 A0_SGE,
845 get_result_vector(p, &inst->Dst[0]),
846 A0_DEST_CHANNEL_ALL, 0,
847 src1,
848 src0, 0);
849
850 i915_emit_arith(p,
851 A0_MUL,
852 get_result_vector(p, &inst->Dst[0]),
853 A0_DEST_CHANNEL_ALL, 0,
854 get_result_vector(p, &inst->Dst[0]),
855 tmp, 0);
856
857 break;
858
859 case TGSI_OPCODE_SGE:
860 emit_simple_arith(p, inst, A0_SGE, 2, fs);
861 break;
862
863 case TGSI_OPCODE_SIN:
864 src0 = src_vector(p, &inst->Src[0], fs);
865 tmp = i915_get_utemp(p);
866
867 i915_emit_arith(p,
868 A0_MUL,
869 tmp, A0_DEST_CHANNEL_X, 0,
870 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
871
872 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
873
874 /* By choosing different taylor constants, could get rid of this mul:
875 */
876 i915_emit_arith(p,
877 A0_MUL,
878 tmp, A0_DEST_CHANNEL_X, 0,
879 tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0);
880
881 /*
882 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
883 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
884 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
885 * result = DP4 t1.wzyx, sin_constants
886 */
887 i915_emit_arith(p,
888 A0_MUL,
889 tmp, A0_DEST_CHANNEL_XY, 0,
890 swizzle(tmp, X, X, ONE, ONE),
891 swizzle(tmp, X, ONE, ONE, ONE), 0);
892
893 i915_emit_arith(p,
894 A0_MUL,
895 tmp, A0_DEST_CHANNEL_ALL, 0,
896 swizzle(tmp, X, Y, X, Y),
897 swizzle(tmp, X, X, ONE, ONE), 0);
898
899 i915_emit_arith(p,
900 A0_MUL,
901 tmp, A0_DEST_CHANNEL_ALL, 0,
902 swizzle(tmp, X, Y, Y, W),
903 swizzle(tmp, X, Z, ONE, ONE), 0);
904
905 i915_emit_arith(p,
906 A0_DP4,
907 get_result_vector(p, &inst->Dst[0]),
908 get_result_flags(inst), 0,
909 swizzle(tmp, W, Z, Y, X),
910 i915_emit_const4fv(p, sin_constants), 0);
911 break;
912
913 case TGSI_OPCODE_SLE:
914 /* like SGE, but swap reg0, reg1 */
915 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
916 break;
917
918 case TGSI_OPCODE_SLT:
919 emit_simple_arith(p, inst, A0_SLT, 2, fs);
920 break;
921
922 case TGSI_OPCODE_SGT:
923 /* like SLT, but swap reg0, reg1 */
924 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
925 break;
926
927 case TGSI_OPCODE_SNE:
928 /* if we're < or > then we're != */
929 src0 = src_vector(p, &inst->Src[0], fs);
930 src1 = src_vector(p, &inst->Src[1], fs);
931 tmp = i915_get_utemp(p);
932
933 i915_emit_arith(p,
934 A0_SLT,
935 tmp,
936 A0_DEST_CHANNEL_ALL, 0,
937 src0,
938 src1, 0);
939
940 i915_emit_arith(p,
941 A0_SLT,
942 get_result_vector(p, &inst->Dst[0]),
943 A0_DEST_CHANNEL_ALL, 0,
944 src1,
945 src0, 0);
946
947 i915_emit_arith(p,
948 A0_ADD,
949 get_result_vector(p, &inst->Dst[0]),
950 A0_DEST_CHANNEL_ALL, 0,
951 get_result_vector(p, &inst->Dst[0]),
952 tmp, 0);
953 break;
954
955 case TGSI_OPCODE_SSG:
956 /* compute (src>0) - (src<0) */
957 src0 = src_vector(p, &inst->Src[0], fs);
958 tmp = i915_get_utemp(p);
959
960 i915_emit_arith(p,
961 A0_SLT,
962 tmp,
963 A0_DEST_CHANNEL_ALL, 0,
964 src0,
965 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
966
967 i915_emit_arith(p,
968 A0_SLT,
969 get_result_vector(p, &inst->Dst[0]),
970 A0_DEST_CHANNEL_ALL, 0,
971 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
972 src0, 0);
973
974 i915_emit_arith(p,
975 A0_ADD,
976 get_result_vector(p, &inst->Dst[0]),
977 A0_DEST_CHANNEL_ALL, 0,
978 get_result_vector(p, &inst->Dst[0]),
979 negate(tmp, 1, 1, 1, 1), 0);
980 break;
981
982 case TGSI_OPCODE_SUB:
983 src0 = src_vector(p, &inst->Src[0], fs);
984 src1 = src_vector(p, &inst->Src[1], fs);
985
986 i915_emit_arith(p,
987 A0_ADD,
988 get_result_vector(p, &inst->Dst[0]),
989 get_result_flags(inst), 0,
990 src0, negate(src1, 1, 1, 1, 1), 0);
991 break;
992
993 case TGSI_OPCODE_TEX:
994 emit_tex(p, inst, T0_TEXLD, fs);
995 break;
996
997 case TGSI_OPCODE_TRUNC:
998 emit_simple_arith(p, inst, A0_TRC, 1, fs);
999 break;
1000
1001 case TGSI_OPCODE_TXB:
1002 emit_tex(p, inst, T0_TEXLDB, fs);
1003 break;
1004
1005 case TGSI_OPCODE_TXP:
1006 emit_tex(p, inst, T0_TEXLDP, fs);
1007 break;
1008
1009 case TGSI_OPCODE_XPD:
1010 /* Cross product:
1011 * result.x = src0.y * src1.z - src0.z * src1.y;
1012 * result.y = src0.z * src1.x - src0.x * src1.z;
1013 * result.z = src0.x * src1.y - src0.y * src1.x;
1014 * result.w = undef;
1015 */
1016 src0 = src_vector(p, &inst->Src[0], fs);
1017 src1 = src_vector(p, &inst->Src[1], fs);
1018 tmp = i915_get_utemp(p);
1019
1020 i915_emit_arith(p,
1021 A0_MUL,
1022 tmp, A0_DEST_CHANNEL_ALL, 0,
1023 swizzle(src0, Z, X, Y, ONE),
1024 swizzle(src1, Y, Z, X, ONE), 0);
1025
1026 i915_emit_arith(p,
1027 A0_MAD,
1028 get_result_vector(p, &inst->Dst[0]),
1029 get_result_flags(inst), 0,
1030 swizzle(src0, Y, Z, X, ONE),
1031 swizzle(src1, Z, X, Y, ONE),
1032 negate(tmp, 1, 1, 1, 0));
1033 break;
1034
1035 default:
1036 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
1037 p->error = 1;
1038 return;
1039 }
1040
1041 i915_release_utemps(p);
1042 }
1043
1044
1045 /**
1046 * Translate TGSI fragment shader into i915 hardware instructions.
1047 * \param p the translation state
1048 * \param tokens the TGSI token array
1049 */
1050 static void
1051 i915_translate_instructions(struct i915_fp_compile *p,
1052 const struct tgsi_token *tokens,
1053 struct i915_fragment_shader *fs)
1054 {
1055 struct i915_fragment_shader *ifs = p->shader;
1056 struct tgsi_parse_context parse;
1057
1058 tgsi_parse_init( &parse, tokens );
1059
1060 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1061
1062 tgsi_parse_token( &parse );
1063
1064 switch( parse.FullToken.Token.Type ) {
1065 case TGSI_TOKEN_TYPE_PROPERTY:
1066 /*
1067 * We only support one cbuf, but we still need to ignore the property
1068 * correctly so we don't hit the assert at the end of the switch case.
1069 */
1070 assert(parse.FullToken.FullProperty.Property.PropertyName ==
1071 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
1072 break;
1073 case TGSI_TOKEN_TYPE_DECLARATION:
1074 if (parse.FullToken.FullDeclaration.Declaration.File
1075 == TGSI_FILE_CONSTANT) {
1076 uint i;
1077 for (i = parse.FullToken.FullDeclaration.Range.First;
1078 i <= parse.FullToken.FullDeclaration.Range.Last;
1079 i++) {
1080 assert(ifs->constant_flags[i] == 0x0);
1081 ifs->constant_flags[i] = I915_CONSTFLAG_USER;
1082 ifs->num_constants = MAX2(ifs->num_constants, i + 1);
1083 }
1084 }
1085 else if (parse.FullToken.FullDeclaration.Declaration.File
1086 == TGSI_FILE_TEMPORARY) {
1087 uint i;
1088 for (i = parse.FullToken.FullDeclaration.Range.First;
1089 i <= parse.FullToken.FullDeclaration.Range.Last;
1090 i++) {
1091 assert(i < I915_MAX_TEMPORARY);
1092 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
1093 p->temp_flag |= (1 << i); /* mark temp as used */
1094 }
1095 }
1096 break;
1097
1098 case TGSI_TOKEN_TYPE_IMMEDIATE:
1099 {
1100 const struct tgsi_full_immediate *imm
1101 = &parse.FullToken.FullImmediate;
1102 const uint pos = p->num_immediates++;
1103 uint j;
1104 assert( imm->Immediate.NrTokens <= 4 + 1 );
1105 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
1106 p->immediates[pos][j] = imm->u[j].Float;
1107 }
1108 }
1109 break;
1110
1111 case TGSI_TOKEN_TYPE_INSTRUCTION:
1112 if (p->first_instruction) {
1113 /* resolve location of immediates */
1114 uint i, j;
1115 for (i = 0; i < p->num_immediates; i++) {
1116 /* find constant slot for this immediate */
1117 for (j = 0; j < I915_MAX_CONSTANT; j++) {
1118 if (ifs->constant_flags[j] == 0x0) {
1119 memcpy(ifs->constants[j],
1120 p->immediates[i],
1121 4 * sizeof(float));
1122 /*printf("immediate %d maps to const %d\n", i, j);*/
1123 ifs->constant_flags[j] = 0xf; /* all four comps used */
1124 p->immediates_map[i] = j;
1125 ifs->num_constants = MAX2(ifs->num_constants, j + 1);
1126 break;
1127 }
1128 }
1129 }
1130
1131 p->first_instruction = FALSE;
1132 }
1133
1134 i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
1135 break;
1136
1137 default:
1138 assert( 0 );
1139 }
1140
1141 } /* while */
1142
1143 tgsi_parse_free (&parse);
1144 }
1145
1146
1147 static struct i915_fp_compile *
1148 i915_init_compile(struct i915_context *i915,
1149 struct i915_fragment_shader *ifs)
1150 {
1151 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
1152 int i;
1153
1154 p->shader = ifs;
1155
1156 /* Put new constants at end of const buffer, growing downward.
1157 * The problem is we don't know how many user-defined constants might
1158 * be specified with pipe->set_constant_buffer().
1159 * Should pre-scan the user's program to determine the highest-numbered
1160 * constant referenced.
1161 */
1162 ifs->num_constants = 0;
1163 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
1164
1165 for (i = 0; i < I915_TEX_UNITS; i++)
1166 ifs->generic_mapping[i] = -1;
1167
1168 p->first_instruction = TRUE;
1169
1170 p->nr_tex_indirect = 1; /* correct? */
1171 p->nr_tex_insn = 0;
1172 p->nr_alu_insn = 0;
1173 p->nr_decl_insn = 0;
1174
1175 p->csr = p->program;
1176 p->decl = p->declarations;
1177 p->decl_s = 0;
1178 p->decl_t = 0;
1179 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
1180 p->utemp_flag = ~0x7;
1181
1182 /* initialize the first program word */
1183 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
1184
1185 return p;
1186 }
1187
1188
1189 /* Copy compile results to the fragment program struct and destroy the
1190 * compilation context.
1191 */
1192 static void
1193 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
1194 {
1195 struct i915_fragment_shader *ifs = p->shader;
1196 unsigned long program_size = (unsigned long) (p->csr - p->program);
1197 unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
1198
1199 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
1200 i915_program_error(p, "Exceeded max nr indirect texture lookups");
1201
1202 if (p->nr_tex_insn > I915_MAX_TEX_INSN)
1203 i915_program_error(p, "Exceeded max TEX instructions");
1204
1205 if (p->nr_alu_insn > I915_MAX_ALU_INSN)
1206 i915_program_error(p, "Exceeded max ALU instructions");
1207
1208 if (p->nr_decl_insn > I915_MAX_DECL_INSN)
1209 i915_program_error(p, "Exceeded max DECL instructions");
1210
1211 if (p->error) {
1212 p->NumNativeInstructions = 0;
1213 p->NumNativeAluInstructions = 0;
1214 p->NumNativeTexInstructions = 0;
1215 p->NumNativeTexIndirections = 0;
1216
1217 i915_use_passthrough_shader(ifs);
1218 }
1219 else {
1220 p->NumNativeInstructions
1221 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1222 p->NumNativeAluInstructions = p->nr_alu_insn;
1223 p->NumNativeTexInstructions = p->nr_tex_insn;
1224 p->NumNativeTexIndirections = p->nr_tex_indirect;
1225
1226 /* patch in the program length */
1227 p->declarations[0] |= program_size + decl_size - 2;
1228
1229 /* Copy compilation results to fragment program struct:
1230 */
1231 assert(!ifs->program);
1232 ifs->program
1233 = (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
1234 if (ifs->program) {
1235 ifs->program_len = program_size + decl_size;
1236
1237 memcpy(ifs->program,
1238 p->declarations,
1239 decl_size * sizeof(uint));
1240
1241 memcpy(ifs->program + decl_size,
1242 p->program,
1243 program_size * sizeof(uint));
1244 }
1245 }
1246
1247 /* Release the compilation struct:
1248 */
1249 FREE(p);
1250 }
1251
1252
1253
1254
1255
1256 /**
1257 * Rather than trying to intercept and jiggle depth writes during
1258 * emit, just move the value into its correct position at the end of
1259 * the program:
1260 */
1261 static void
1262 i915_fixup_depth_write(struct i915_fp_compile *p)
1263 {
1264 /* XXX assuming pos/depth is always in output[0] */
1265 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
1266 const uint depth = UREG(REG_TYPE_OD, 0);
1267
1268 i915_emit_arith(p,
1269 A0_MOV, /* opcode */
1270 depth, /* dest reg */
1271 A0_DEST_CHANNEL_W, /* write mask */
1272 0, /* saturate? */
1273 swizzle(depth, X, Y, Z, Z), /* src0 */
1274 0, 0 /* src1, src2 */);
1275 }
1276 }
1277
1278
1279 void
1280 i915_translate_fragment_program( struct i915_context *i915,
1281 struct i915_fragment_shader *fs)
1282 {
1283 struct i915_fp_compile *p;
1284 const struct tgsi_token *tokens = fs->state.tokens;
1285
1286 #if 0
1287 tgsi_dump(tokens, 0);
1288 #endif
1289
1290 /* hw doesn't seem to like empty frag programs, even when the depth write
1291 * fixup gets emitted below - may that one is fishy, too? */
1292 if (fs->info.num_instructions == 1) {
1293 i915_use_passthrough_shader(fs);
1294
1295 return;
1296 }
1297
1298 p = i915_init_compile(i915, fs);
1299
1300 i915_translate_instructions(p, tokens, fs);
1301 i915_fixup_depth_write(p);
1302
1303 i915_fini_compile(i915, p);
1304 }