merge of glsl-compiler-1 branch
[mesa.git] / src / mesa / drivers / dri / i915 / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "glheader.h"
29 #include "macros.h"
30 #include "enums.h"
31
32 #include "tnl/tnl.h"
33 #include "tnl/t_context.h"
34 #include "intel_batchbuffer.h"
35
36 #include "i915_reg.h"
37 #include "i915_context.h"
38 #include "i915_program.h"
39
40 #include "prog_instruction.h"
41 #include "prog_parameter.h"
42 #include "program.h"
43 #include "programopt.h"
44
45
46
47 /* 1, -1/3!, 1/5!, -1/7! */
48 static const GLfloat sin_constants[4] = { 1.0,
49 -1.0/(3*2*1),
50 1.0/(5*4*3*2*1),
51 -1.0/(7*6*5*4*3*2*1) };
52
53 /* 1, -1/2!, 1/4!, -1/6! */
54 static const GLfloat cos_constants[4] = { 1.0,
55 -1.0/(2*1),
56 1.0/(4*3*2*1),
57 -1.0/(6*5*4*3*2*1) };
58
59 /**
60 * Retrieve a ureg for the given source register. Will emit
61 * constants, apply swizzling and negation as needed.
62 */
63 static GLuint src_vector( struct i915_fragment_program *p,
64 const struct prog_src_register *source,
65 const struct gl_fragment_program *program )
66 {
67 GLuint src;
68
69 switch (source->File) {
70
71 /* Registers:
72 */
73 case PROGRAM_TEMPORARY:
74 if (source->Index >= I915_MAX_TEMPORARY) {
75 i915_program_error( p, "Exceeded max temporary reg" );
76 return 0;
77 }
78 src = UREG( REG_TYPE_R, source->Index );
79 break;
80 case PROGRAM_INPUT:
81 switch (source->Index) {
82 case FRAG_ATTRIB_WPOS:
83 src = i915_emit_decl( p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL );
84 break;
85 case FRAG_ATTRIB_COL0:
86 src = i915_emit_decl( p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL );
87 break;
88 case FRAG_ATTRIB_COL1:
89 src = i915_emit_decl( p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ );
90 src = swizzle( src, X, Y, Z, ONE );
91 break;
92 case FRAG_ATTRIB_FOGC:
93 src = i915_emit_decl( p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W );
94 src = swizzle( src, W, W, W, W );
95 break;
96 case FRAG_ATTRIB_TEX0:
97 case FRAG_ATTRIB_TEX1:
98 case FRAG_ATTRIB_TEX2:
99 case FRAG_ATTRIB_TEX3:
100 case FRAG_ATTRIB_TEX4:
101 case FRAG_ATTRIB_TEX5:
102 case FRAG_ATTRIB_TEX6:
103 case FRAG_ATTRIB_TEX7:
104 src = i915_emit_decl( p, REG_TYPE_T,
105 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
106 D0_CHANNEL_ALL );
107 break;
108
109 default:
110 i915_program_error( p, "Bad source->Index" );
111 return 0;
112 }
113 break;
114
115 /* Various paramters and env values. All emitted to
116 * hardware as program constants.
117 */
118 case PROGRAM_LOCAL_PARAM:
119 src = i915_emit_param4fv(
120 p, program->Base.LocalParams[source->Index]);
121 break;
122
123 case PROGRAM_ENV_PARAM:
124 src = i915_emit_param4fv(
125 p, p->ctx->FragmentProgram.Parameters[source->Index]);
126 break;
127
128 case PROGRAM_CONSTANT:
129 case PROGRAM_STATE_VAR:
130 case PROGRAM_NAMED_PARAM:
131 src = i915_emit_param4fv(
132 p, program->Base.Parameters->ParameterValues[source->Index] );
133 break;
134
135 default:
136 i915_program_error( p, "Bad source->File" );
137 return 0;
138 }
139
140 src = swizzle(src,
141 GET_SWZ(source->Swizzle, 0),
142 GET_SWZ(source->Swizzle, 1),
143 GET_SWZ(source->Swizzle, 2),
144 GET_SWZ(source->Swizzle, 3));
145
146 if (source->NegateBase)
147 src = negate( src,
148 GET_BIT(source->NegateBase, 0),
149 GET_BIT(source->NegateBase, 1),
150 GET_BIT(source->NegateBase, 2),
151 GET_BIT(source->NegateBase, 3));
152
153 return src;
154 }
155
156
157 static GLuint get_result_vector( struct i915_fragment_program *p,
158 const struct prog_instruction *inst )
159 {
160 switch (inst->DstReg.File) {
161 case PROGRAM_OUTPUT:
162 switch (inst->DstReg.Index) {
163 case FRAG_RESULT_COLR:
164 return UREG(REG_TYPE_OC, 0);
165 case FRAG_RESULT_DEPR:
166 p->depth_written = 1;
167 return UREG(REG_TYPE_OD, 0);
168 default:
169 i915_program_error( p, "Bad inst->DstReg.Index" );
170 return 0;
171 }
172 case PROGRAM_TEMPORARY:
173 return UREG(REG_TYPE_R, inst->DstReg.Index);
174 default:
175 i915_program_error( p, "Bad inst->DstReg.File" );
176 return 0;
177 }
178 }
179
180 static GLuint get_result_flags( const struct prog_instruction *inst )
181 {
182 GLuint flags = 0;
183
184 if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
185 if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
186 if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
187 if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
188 if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
189
190 return flags;
191 }
192
193 static GLuint translate_tex_src_target( struct i915_fragment_program *p,
194 GLubyte bit )
195 {
196 switch (bit) {
197 case TEXTURE_1D_INDEX: return D0_SAMPLE_TYPE_2D;
198 case TEXTURE_2D_INDEX: return D0_SAMPLE_TYPE_2D;
199 case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
200 case TEXTURE_3D_INDEX: return D0_SAMPLE_TYPE_VOLUME;
201 case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
202 default: i915_program_error(p, "TexSrcBit"); return 0;
203 }
204 }
205
206 #define EMIT_TEX( OP ) \
207 do { \
208 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
209 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
210 inst->TexSrcUnit, dim); \
211 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
212 /* Texel lookup */ \
213 \
214 i915_emit_texld( p, \
215 get_result_vector( p, inst ), \
216 get_result_flags( inst ), \
217 sampler, \
218 coord, \
219 OP); \
220 } while (0)
221
222 #define EMIT_ARITH( OP, N ) \
223 do { \
224 i915_emit_arith( p, \
225 OP, \
226 get_result_vector( p, inst ), \
227 get_result_flags( inst ), 0, \
228 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
229 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
230 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
231 } while (0)
232
233 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
234 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
235 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
236
237
238 /* Possible concerns:
239 *
240 * SIN, COS -- could use another taylor step?
241 * LIT -- results seem a little different to sw mesa
242 * LOG -- different to mesa on negative numbers, but this is conformant.
243 *
244 * Parse failures -- Mesa doesn't currently give a good indication
245 * internally whether a particular program string parsed or not. This
246 * can lead to confusion -- hopefully we cope with it ok now.
247 *
248 */
249 static void upload_program( struct i915_fragment_program *p )
250 {
251 const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
252 const struct prog_instruction *inst = program->Base.Instructions;
253
254 /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
255
256 /* Is this a parse-failed program? Ensure a valid program is
257 * loaded, as the flagging of an error isn't sufficient to stop
258 * this being uploaded to hardware.
259 */
260 if (inst[0].Opcode == OPCODE_END) {
261 GLuint tmp = i915_get_utemp( p );
262 i915_emit_arith( p,
263 A0_MOV,
264 UREG(REG_TYPE_OC, 0),
265 A0_DEST_CHANNEL_ALL, 0,
266 swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
267 return;
268 }
269
270 while (1) {
271 GLuint src0, src1, src2, flags;
272 GLuint tmp = 0;
273
274 switch (inst->Opcode) {
275 case OPCODE_ABS:
276 src0 = src_vector( p, &inst->SrcReg[0], program);
277 i915_emit_arith( p,
278 A0_MAX,
279 get_result_vector( p, inst ),
280 get_result_flags( inst ), 0,
281 src0, negate(src0, 1,1,1,1), 0);
282 break;
283
284 case OPCODE_ADD:
285 EMIT_2ARG_ARITH( A0_ADD );
286 break;
287
288 case OPCODE_CMP:
289 src0 = src_vector( p, &inst->SrcReg[0], program);
290 src1 = src_vector( p, &inst->SrcReg[1], program);
291 src2 = src_vector( p, &inst->SrcReg[2], program);
292 i915_emit_arith( p,
293 A0_CMP,
294 get_result_vector( p, inst ),
295 get_result_flags( inst ), 0,
296 src0, src2, src1); /* NOTE: order of src2, src1 */
297 break;
298
299 case OPCODE_COS:
300 src0 = src_vector( p, &inst->SrcReg[0], program);
301 tmp = i915_get_utemp( p );
302
303 i915_emit_arith( p,
304 A0_MUL,
305 tmp, A0_DEST_CHANNEL_X, 0,
306 src0,
307 i915_emit_const1f(p, 1.0/(M_PI * 2)),
308 0);
309
310 i915_emit_arith( p,
311 A0_MOD,
312 tmp, A0_DEST_CHANNEL_X, 0,
313 tmp,
314 0, 0 );
315
316 /* By choosing different taylor constants, could get rid of this mul:
317 */
318 i915_emit_arith( p,
319 A0_MUL,
320 tmp, A0_DEST_CHANNEL_X, 0,
321 tmp,
322 i915_emit_const1f(p, (M_PI * 2)),
323 0);
324
325 /*
326 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
327 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
328 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
329 * result = DP4 t0, cos_constants
330 */
331 i915_emit_arith( p,
332 A0_MUL,
333 tmp, A0_DEST_CHANNEL_XY, 0,
334 swizzle(tmp, X,X,ONE,ONE),
335 swizzle(tmp, X,ONE,ONE,ONE), 0);
336
337 i915_emit_arith( p,
338 A0_MUL,
339 tmp, A0_DEST_CHANNEL_XYZ, 0,
340 swizzle(tmp, X,Y,X,ONE),
341 swizzle(tmp, X,X,ONE,ONE), 0);
342
343 i915_emit_arith( p,
344 A0_MUL,
345 tmp, A0_DEST_CHANNEL_XYZ, 0,
346 swizzle(tmp, X,X,Z,ONE),
347 swizzle(tmp, Z,ONE,ONE,ONE), 0);
348
349 i915_emit_arith( p,
350 A0_DP4,
351 get_result_vector( p, inst ),
352 get_result_flags( inst ), 0,
353 swizzle(tmp, ONE,Z,Y,X),
354 i915_emit_const4fv( p, cos_constants ), 0);
355
356 break;
357
358 case OPCODE_DP3:
359 EMIT_2ARG_ARITH( A0_DP3 );
360 break;
361
362 case OPCODE_DP4:
363 EMIT_2ARG_ARITH( A0_DP4 );
364 break;
365
366 case OPCODE_DPH:
367 src0 = src_vector( p, &inst->SrcReg[0], program);
368 src1 = src_vector( p, &inst->SrcReg[1], program);
369
370 i915_emit_arith( p,
371 A0_DP4,
372 get_result_vector( p, inst ),
373 get_result_flags( inst ), 0,
374 swizzle(src0, X,Y,Z,ONE), src1, 0);
375 break;
376
377 case OPCODE_DST:
378 src0 = src_vector( p, &inst->SrcReg[0], program);
379 src1 = src_vector( p, &inst->SrcReg[1], program);
380
381 /* result[0] = 1 * 1;
382 * result[1] = a[1] * b[1];
383 * result[2] = a[2] * 1;
384 * result[3] = 1 * b[3];
385 */
386 i915_emit_arith( p,
387 A0_MUL,
388 get_result_vector( p, inst ),
389 get_result_flags( inst ), 0,
390 swizzle(src0, ONE, Y, Z, ONE),
391 swizzle(src1, ONE, Y, ONE, W ),
392 0);
393 break;
394
395 case OPCODE_EX2:
396 src0 = src_vector( p, &inst->SrcReg[0], program);
397
398 i915_emit_arith( p,
399 A0_EXP,
400 get_result_vector( p, inst ),
401 get_result_flags( inst ), 0,
402 swizzle(src0,X,X,X,X), 0, 0);
403 break;
404
405 case OPCODE_FLR:
406 EMIT_1ARG_ARITH( A0_FLR );
407 break;
408
409 case OPCODE_FRC:
410 EMIT_1ARG_ARITH( A0_FRC );
411 break;
412
413 case OPCODE_KIL:
414 src0 = src_vector( p, &inst->SrcReg[0], program);
415 tmp = i915_get_utemp( p );
416
417 i915_emit_texld( p,
418 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
419 0,
420 src0,
421 T0_TEXKILL );
422 break;
423
424 case OPCODE_LG2:
425 src0 = src_vector( p, &inst->SrcReg[0], program);
426
427 i915_emit_arith( p,
428 A0_LOG,
429 get_result_vector( p, inst ),
430 get_result_flags( inst ), 0,
431 swizzle(src0,X,X,X,X), 0, 0);
432 break;
433
434 case OPCODE_LIT:
435 src0 = src_vector( p, &inst->SrcReg[0], program);
436 tmp = i915_get_utemp( p );
437
438 /* tmp = max( a.xyzw, a.00zw )
439 * XXX: Clamp tmp.w to -128..128
440 * tmp.y = log(tmp.y)
441 * tmp.y = tmp.w * tmp.y
442 * tmp.y = exp(tmp.y)
443 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
444 */
445 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
446 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
447
448 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
449 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
450
451 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
452 swizzle(tmp, ZERO, Y, ZERO, ZERO),
453 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
454
455 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
456 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
457
458 i915_emit_arith( p, A0_CMP,
459 get_result_vector( p, inst ),
460 get_result_flags( inst ), 0,
461 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
462 swizzle(tmp, ONE, X, ZERO, ONE),
463 swizzle(tmp, ONE, X, Y, ONE));
464
465 break;
466
467 case OPCODE_LRP:
468 src0 = src_vector( p, &inst->SrcReg[0], program);
469 src1 = src_vector( p, &inst->SrcReg[1], program);
470 src2 = src_vector( p, &inst->SrcReg[2], program);
471 flags = get_result_flags( inst );
472 tmp = i915_get_utemp( p );
473
474 /* b*a + c*(1-a)
475 *
476 * b*a + c - ca
477 *
478 * tmp = b*a + c,
479 * result = (-c)*a + tmp
480 */
481 i915_emit_arith( p, A0_MAD, tmp,
482 flags & A0_DEST_CHANNEL_ALL, 0,
483 src1, src0, src2 );
484
485 i915_emit_arith( p, A0_MAD,
486 get_result_vector( p, inst ),
487 flags, 0,
488 negate(src2, 1,1,1,1), src0, tmp );
489 break;
490
491 case OPCODE_MAD:
492 EMIT_3ARG_ARITH( A0_MAD );
493 break;
494
495 case OPCODE_MAX:
496 EMIT_2ARG_ARITH( A0_MAX );
497 break;
498
499 case OPCODE_MIN:
500 src0 = src_vector( p, &inst->SrcReg[0], program);
501 src1 = src_vector( p, &inst->SrcReg[1], program);
502 tmp = i915_get_utemp( p );
503 flags = get_result_flags( inst );
504
505 i915_emit_arith( p,
506 A0_MAX,
507 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
508 negate(src0,1,1,1,1),
509 negate(src1,1,1,1,1), 0);
510
511 i915_emit_arith( p,
512 A0_MOV,
513 get_result_vector( p, inst ),
514 flags, 0,
515 negate(tmp, 1,1,1,1), 0, 0);
516 break;
517
518 case OPCODE_MOV:
519 EMIT_1ARG_ARITH( A0_MOV );
520 break;
521
522 case OPCODE_MUL:
523 EMIT_2ARG_ARITH( A0_MUL );
524 break;
525
526 case OPCODE_POW:
527 src0 = src_vector( p, &inst->SrcReg[0], program);
528 src1 = src_vector( p, &inst->SrcReg[1], program);
529 tmp = i915_get_utemp( p );
530 flags = get_result_flags( inst );
531
532 /* XXX: masking on intermediate values, here and elsewhere.
533 */
534 i915_emit_arith( p,
535 A0_LOG,
536 tmp, A0_DEST_CHANNEL_X, 0,
537 swizzle(src0,X,X,X,X), 0, 0);
538
539 i915_emit_arith( p,
540 A0_MUL,
541 tmp, A0_DEST_CHANNEL_X, 0,
542 tmp, src1, 0);
543
544
545 i915_emit_arith( p,
546 A0_EXP,
547 get_result_vector( p, inst ),
548 flags, 0,
549 swizzle(tmp,X,X,X,X), 0, 0);
550
551 break;
552
553 case OPCODE_RCP:
554 src0 = src_vector( p, &inst->SrcReg[0], program);
555
556 i915_emit_arith( p,
557 A0_RCP,
558 get_result_vector( p, inst ),
559 get_result_flags( inst ), 0,
560 swizzle(src0,X,X,X,X), 0, 0);
561 break;
562
563 case OPCODE_RSQ:
564
565 src0 = src_vector( p, &inst->SrcReg[0], program);
566
567 i915_emit_arith( p,
568 A0_RSQ,
569 get_result_vector( p, inst ),
570 get_result_flags( inst ), 0,
571 swizzle(src0,X,X,X,X), 0, 0);
572 break;
573
574 case OPCODE_SCS:
575 src0 = src_vector( p, &inst->SrcReg[0], program);
576 tmp = i915_get_utemp( p );
577
578 /*
579 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
580 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
581 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
582 * scs.x = DP4 t1, sin_constants
583 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
584 * scs.y = DP4 t1, cos_constants
585 */
586 i915_emit_arith( p,
587 A0_MUL,
588 tmp, A0_DEST_CHANNEL_XY, 0,
589 swizzle(src0, X,X,ONE,ONE),
590 swizzle(src0, X,ONE,ONE,ONE), 0);
591
592 i915_emit_arith( p,
593 A0_MUL,
594 tmp, A0_DEST_CHANNEL_ALL, 0,
595 swizzle(tmp, X,Y,X,Y),
596 swizzle(tmp, X,X,ONE,ONE), 0);
597
598 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
599 GLuint tmp1;
600
601 if (inst->DstReg.WriteMask & WRITEMASK_X)
602 tmp1 = i915_get_utemp( p );
603 else
604 tmp1 = tmp;
605
606 i915_emit_arith( p,
607 A0_MUL,
608 tmp1, A0_DEST_CHANNEL_ALL, 0,
609 swizzle(tmp, X,Y,Y,W),
610 swizzle(tmp, X,Z,ONE,ONE), 0);
611
612 i915_emit_arith( p,
613 A0_DP4,
614 get_result_vector( p, inst ),
615 A0_DEST_CHANNEL_Y, 0,
616 swizzle(tmp1, W,Z,Y,X),
617 i915_emit_const4fv( p, sin_constants ), 0);
618 }
619
620 if (inst->DstReg.WriteMask & WRITEMASK_X) {
621 i915_emit_arith( p,
622 A0_MUL,
623 tmp, A0_DEST_CHANNEL_XYZ, 0,
624 swizzle(tmp, X,X,Z,ONE),
625 swizzle(tmp, Z,ONE,ONE,ONE), 0);
626
627 i915_emit_arith( p,
628 A0_DP4,
629 get_result_vector( p, inst ),
630 A0_DEST_CHANNEL_X, 0,
631 swizzle(tmp, ONE,Z,Y,X),
632 i915_emit_const4fv( p, cos_constants ), 0);
633 }
634 break;
635
636 case OPCODE_SGE:
637 EMIT_2ARG_ARITH( A0_SGE );
638 break;
639
640 case OPCODE_SIN:
641 src0 = src_vector( p, &inst->SrcReg[0], program);
642 tmp = i915_get_utemp( p );
643
644 i915_emit_arith( p,
645 A0_MUL,
646 tmp, A0_DEST_CHANNEL_X, 0,
647 src0,
648 i915_emit_const1f(p, 1.0/(M_PI * 2)),
649 0);
650
651 i915_emit_arith( p,
652 A0_MOD,
653 tmp, A0_DEST_CHANNEL_X, 0,
654 tmp,
655 0, 0 );
656
657 /* By choosing different taylor constants, could get rid of this mul:
658 */
659 i915_emit_arith( p,
660 A0_MUL,
661 tmp, A0_DEST_CHANNEL_X, 0,
662 tmp,
663 i915_emit_const1f(p, (M_PI * 2)),
664 0);
665
666 /*
667 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
668 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
669 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
670 * result = DP4 t1.wzyx, sin_constants
671 */
672 i915_emit_arith( p,
673 A0_MUL,
674 tmp, A0_DEST_CHANNEL_XY, 0,
675 swizzle(tmp, X,X,ONE,ONE),
676 swizzle(tmp, X,ONE,ONE,ONE), 0);
677
678 i915_emit_arith( p,
679 A0_MUL,
680 tmp, A0_DEST_CHANNEL_ALL, 0,
681 swizzle(tmp, X,Y,X,Y),
682 swizzle(tmp, X,X,ONE,ONE), 0);
683
684 i915_emit_arith( p,
685 A0_MUL,
686 tmp, A0_DEST_CHANNEL_ALL, 0,
687 swizzle(tmp, X,Y,Y,W),
688 swizzle(tmp, X,Z,ONE,ONE), 0);
689
690 i915_emit_arith( p,
691 A0_DP4,
692 get_result_vector( p, inst ),
693 get_result_flags( inst ), 0,
694 swizzle(tmp, W, Z, Y, X ),
695 i915_emit_const4fv( p, sin_constants ), 0);
696 break;
697
698 case OPCODE_SLT:
699 EMIT_2ARG_ARITH( A0_SLT );
700 break;
701
702 case OPCODE_SUB:
703 src0 = src_vector( p, &inst->SrcReg[0], program);
704 src1 = src_vector( p, &inst->SrcReg[1], program);
705
706 i915_emit_arith( p,
707 A0_ADD,
708 get_result_vector( p, inst ),
709 get_result_flags( inst ), 0,
710 src0, negate(src1, 1,1,1,1), 0);
711 break;
712
713 case OPCODE_SWZ:
714 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
715 break;
716
717 case OPCODE_TEX:
718 EMIT_TEX( T0_TEXLD );
719 break;
720
721 case OPCODE_TXB:
722 EMIT_TEX( T0_TEXLDB );
723 break;
724
725 case OPCODE_TXP:
726 EMIT_TEX( T0_TEXLDP );
727 break;
728
729 case OPCODE_XPD:
730 /* Cross product:
731 * result.x = src0.y * src1.z - src0.z * src1.y;
732 * result.y = src0.z * src1.x - src0.x * src1.z;
733 * result.z = src0.x * src1.y - src0.y * src1.x;
734 * result.w = undef;
735 */
736 src0 = src_vector( p, &inst->SrcReg[0], program);
737 src1 = src_vector( p, &inst->SrcReg[1], program);
738 tmp = i915_get_utemp( p );
739
740 i915_emit_arith( p,
741 A0_MUL,
742 tmp, A0_DEST_CHANNEL_ALL, 0,
743 swizzle(src0,Z,X,Y,ONE),
744 swizzle(src1,Y,Z,X,ONE), 0);
745
746 i915_emit_arith( p,
747 A0_MAD,
748 get_result_vector( p, inst ),
749 get_result_flags( inst ), 0,
750 swizzle(src0,Y,Z,X,ONE),
751 swizzle(src1,Z,X,Y,ONE),
752 negate(tmp,1,1,1,0));
753 break;
754
755 case OPCODE_END:
756 return;
757
758 default:
759 i915_program_error( p, "bad opcode" );
760 return;
761 }
762
763 inst++;
764 i915_release_utemps( p );
765 }
766 }
767
768 /* Rather than trying to intercept and jiggle depth writes during
769 * emit, just move the value into its correct position at the end of
770 * the program:
771 */
772 static void fixup_depth_write( struct i915_fragment_program *p )
773 {
774 if (p->depth_written) {
775 GLuint depth = UREG(REG_TYPE_OD, 0);
776
777 i915_emit_arith( p,
778 A0_MOV,
779 depth, A0_DEST_CHANNEL_W, 0,
780 swizzle(depth,X,Y,Z,Z),
781 0, 0);
782 }
783 }
784
785
786 static void check_wpos( struct i915_fragment_program *p )
787 {
788 GLuint inputs = p->FragProg.Base.InputsRead;
789 GLint i;
790
791 p->wpos_tex = -1;
792
793 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
794 if (inputs & FRAG_BIT_TEX(i))
795 continue;
796 else if (inputs & FRAG_BIT_WPOS) {
797 p->wpos_tex = i;
798 inputs &= ~FRAG_BIT_WPOS;
799 }
800 }
801
802 if (inputs & FRAG_BIT_WPOS) {
803 i915_program_error(p, "No free texcoord for wpos value");
804 }
805 }
806
807
808 static void translate_program( struct i915_fragment_program *p )
809 {
810 i915ContextPtr i915 = I915_CONTEXT(p->ctx);
811
812 i915_init_program( i915, p );
813 check_wpos( p );
814 upload_program( p );
815 fixup_depth_write( p );
816 i915_fini_program( p );
817
818 p->translated = 1;
819 }
820
821
822 static void track_params( struct i915_fragment_program *p )
823 {
824 GLint i;
825
826 if (p->nr_params)
827 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
828
829 for (i = 0; i < p->nr_params; i++) {
830 GLint reg = p->param[i].reg;
831 COPY_4V( p->constant[reg], p->param[i].values );
832 }
833
834 p->params_uptodate = 1;
835 p->on_hardware = 0; /* overkill */
836 }
837
838
839 static void i915BindProgram( GLcontext *ctx,
840 GLenum target,
841 struct gl_program *prog )
842 {
843 if (target == GL_FRAGMENT_PROGRAM_ARB) {
844 i915ContextPtr i915 = I915_CONTEXT(ctx);
845 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
846
847 if (i915->current_program == p)
848 return;
849
850 if (i915->current_program) {
851 i915->current_program->on_hardware = 0;
852 i915->current_program->params_uptodate = 0;
853 }
854
855 i915->current_program = p;
856
857 assert(p->on_hardware == 0);
858 assert(p->params_uptodate == 0);
859
860 /* Hack: make sure fog is correctly enabled according to this
861 * fragment program's fog options.
862 */
863 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
864 ctx->FragmentProgram.Enabled );
865 }
866 }
867
868 static struct gl_program *i915NewProgram( GLcontext *ctx,
869 GLenum target,
870 GLuint id )
871 {
872 switch (target) {
873 case GL_VERTEX_PROGRAM_ARB:
874 return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
875 target, id );
876
877 case GL_FRAGMENT_PROGRAM_ARB: {
878 struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
879 if (prog) {
880 i915_init_program( I915_CONTEXT(ctx), prog );
881
882 return _mesa_init_fragment_program( ctx, &prog->FragProg,
883 target, id );
884 }
885 else
886 return NULL;
887 }
888
889 default:
890 /* Just fallback:
891 */
892 return _mesa_new_program( ctx, target, id );
893 }
894 }
895
896 static void i915DeleteProgram( GLcontext *ctx,
897 struct gl_program *prog )
898 {
899 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
900 i915ContextPtr i915 = I915_CONTEXT(ctx);
901 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
902
903 if (i915->current_program == p)
904 i915->current_program = 0;
905 }
906
907 _mesa_delete_program( ctx, prog );
908 }
909
910
911 static GLboolean i915IsProgramNative( GLcontext *ctx,
912 GLenum target,
913 struct gl_program *prog )
914 {
915 if (target == GL_FRAGMENT_PROGRAM_ARB) {
916 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
917
918 if (!p->translated)
919 translate_program( p );
920
921 return !p->error;
922 }
923 else
924 return GL_TRUE;
925 }
926
927 static void i915ProgramStringNotify( GLcontext *ctx,
928 GLenum target,
929 struct gl_program *prog )
930 {
931 if (target == GL_FRAGMENT_PROGRAM_ARB) {
932 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
933 p->translated = 0;
934
935 /* Hack: make sure fog is correctly enabled according to this
936 * fragment program's fog options.
937 */
938 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
939 ctx->FragmentProgram.Enabled );
940
941 if (p->FragProg.FogOption) {
942 /* add extra instructions to do fog, then turn off FogOption field */
943 _mesa_append_fog_code(ctx, &p->FragProg);
944 p->FragProg.FogOption = GL_NONE;
945 }
946 }
947
948 _tnl_program_string(ctx, target, prog);
949 }
950
951
952 void i915ValidateFragmentProgram( i915ContextPtr i915 )
953 {
954 GLcontext *ctx = &i915->intel.ctx;
955 intelContextPtr intel = INTEL_CONTEXT(ctx);
956 TNLcontext *tnl = TNL_CONTEXT(ctx);
957 struct vertex_buffer *VB = &tnl->vb;
958
959 struct i915_fragment_program *p =
960 (struct i915_fragment_program *)ctx->FragmentProgram._Current;
961
962 const GLuint inputsRead = p->FragProg.Base.InputsRead;
963 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
964 GLuint s2 = S2_TEXCOORD_NONE;
965 int i, offset = 0;
966
967 if (i915->current_program != p)
968 {
969 if (i915->current_program) {
970 i915->current_program->on_hardware = 0;
971 i915->current_program->params_uptodate = 0;
972 }
973
974 i915->current_program = p;
975 }
976
977
978 /* Important:
979 */
980 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
981
982 if (!p->translated)
983 translate_program( p );
984
985 intel->vertex_attr_count = 0;
986 intel->wpos_offset = 0;
987 intel->wpos_size = 0;
988 intel->coloroffset = 0;
989 intel->specoffset = 0;
990
991 if (inputsRead & FRAG_BITS_TEX_ANY) {
992 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
993 }
994 else {
995 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
996 }
997
998 if (inputsRead & FRAG_BIT_COL0) {
999 intel->coloroffset = offset / 4;
1000 EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
1001 }
1002
1003 if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) ||
1004 i915->vertex_fog != I915_FOG_NONE) {
1005
1006 if (inputsRead & FRAG_BIT_COL1) {
1007 intel->specoffset = offset / 4;
1008 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
1009 }
1010 else
1011 EMIT_PAD(3);
1012
1013 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
1014 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
1015 else
1016 EMIT_PAD( 1 );
1017 }
1018
1019 /* XXX this was disabled, but enabling this code helped fix the Glean
1020 * tfragprog1 fog tests.
1021 */
1022 #if 1
1023 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1024 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
1025 }
1026 #endif
1027
1028 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1029 if (inputsRead & FRAG_BIT_TEX(i)) {
1030 int sz = VB->TexCoordPtr[i]->size;
1031
1032 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1033 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1034
1035 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
1036 }
1037 else if (i == p->wpos_tex) {
1038
1039 /* If WPOS is required, duplicate the XYZ position data in an
1040 * unused texture coordinate:
1041 */
1042 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1043 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1044
1045 intel->wpos_offset = offset;
1046 intel->wpos_size = 3 * sizeof(GLuint);
1047
1048 EMIT_PAD( intel->wpos_size );
1049 }
1050 }
1051
1052 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1053 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1054
1055 I915_STATECHANGE( i915, I915_UPLOAD_CTX );
1056
1057 /* Must do this *after* statechange, so as not to affect
1058 * buffered vertices reliant on the old state:
1059 */
1060 intel->vertex_size = _tnl_install_attrs( &intel->ctx,
1061 intel->vertex_attrs,
1062 intel->vertex_attr_count,
1063 intel->ViewportMatrix.m, 0 );
1064
1065 intel->vertex_size >>= 2;
1066
1067 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1068 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1069
1070 assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
1071 }
1072
1073 if (!p->params_uptodate)
1074 track_params( p );
1075
1076 if (!p->on_hardware)
1077 i915_upload_program( i915, p );
1078 }
1079
1080 void i915InitFragProgFuncs( struct dd_function_table *functions )
1081 {
1082 functions->BindProgram = i915BindProgram;
1083 functions->NewProgram = i915NewProgram;
1084 functions->DeleteProgram = i915DeleteProgram;
1085 functions->IsProgramNative = i915IsProgramNative;
1086 functions->ProgramStringNotify = i915ProgramStringNotify;
1087 }