merge current trunk into vbo branch
[mesa.git] / src / mesa / drivers / dri / i915 / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "glheader.h"
29 #include "macros.h"
30 #include "enums.h"
31
32 #include "tnl/tnl.h"
33 #include "tnl/t_context.h"
34 #include "intel_batchbuffer.h"
35
36 #include "i915_reg.h"
37 #include "i915_context.h"
38 #include "i915_program.h"
39
40 #include "program_instruction.h"
41 #include "program.h"
42
43
44
45 /* 1, -1/3!, 1/5!, -1/7! */
46 static const GLfloat sin_constants[4] = { 1.0,
47 -1.0/(3*2*1),
48 1.0/(5*4*3*2*1),
49 -1.0/(7*6*5*4*3*2*1) };
50
51 /* 1, -1/2!, 1/4!, -1/6! */
52 static const GLfloat cos_constants[4] = { 1.0,
53 -1.0/(2*1),
54 1.0/(4*3*2*1),
55 -1.0/(6*5*4*3*2*1) };
56
57 /**
58 * Retrieve a ureg for the given source register. Will emit
59 * constants, apply swizzling and negation as needed.
60 */
61 static GLuint src_vector( struct i915_fragment_program *p,
62 const struct prog_src_register *source,
63 const struct gl_fragment_program *program )
64 {
65 GLuint src;
66
67 switch (source->File) {
68
69 /* Registers:
70 */
71 case PROGRAM_TEMPORARY:
72 if (source->Index >= I915_MAX_TEMPORARY) {
73 i915_program_error( p, "Exceeded max temporary reg" );
74 return 0;
75 }
76 src = UREG( REG_TYPE_R, source->Index );
77 break;
78 case PROGRAM_INPUT:
79 switch (source->Index) {
80 case FRAG_ATTRIB_WPOS:
81 src = i915_emit_decl( p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL );
82 break;
83 case FRAG_ATTRIB_COL0:
84 src = i915_emit_decl( p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL );
85 break;
86 case FRAG_ATTRIB_COL1:
87 src = i915_emit_decl( p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ );
88 src = swizzle( src, X, Y, Z, ONE );
89 break;
90 case FRAG_ATTRIB_FOGC:
91 src = i915_emit_decl( p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W );
92 src = swizzle( src, W, W, W, W );
93 break;
94 case FRAG_ATTRIB_TEX0:
95 case FRAG_ATTRIB_TEX1:
96 case FRAG_ATTRIB_TEX2:
97 case FRAG_ATTRIB_TEX3:
98 case FRAG_ATTRIB_TEX4:
99 case FRAG_ATTRIB_TEX5:
100 case FRAG_ATTRIB_TEX6:
101 case FRAG_ATTRIB_TEX7:
102 src = i915_emit_decl( p, REG_TYPE_T,
103 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
104 D0_CHANNEL_ALL );
105 break;
106
107 default:
108 i915_program_error( p, "Bad source->Index" );
109 return 0;
110 }
111 break;
112
113 /* Various paramters and env values. All emitted to
114 * hardware as program constants.
115 */
116 case PROGRAM_LOCAL_PARAM:
117 src = i915_emit_param4fv(
118 p, program->Base.LocalParams[source->Index]);
119 break;
120
121 case PROGRAM_ENV_PARAM:
122 src = i915_emit_param4fv(
123 p, p->ctx->FragmentProgram.Parameters[source->Index]);
124 break;
125
126 case PROGRAM_STATE_VAR:
127 case PROGRAM_NAMED_PARAM:
128 src = i915_emit_param4fv(
129 p, program->Base.Parameters->ParameterValues[source->Index] );
130 break;
131
132 default:
133 i915_program_error( p, "Bad source->File" );
134 return 0;
135 }
136
137 src = swizzle(src,
138 GET_SWZ(source->Swizzle, 0),
139 GET_SWZ(source->Swizzle, 1),
140 GET_SWZ(source->Swizzle, 2),
141 GET_SWZ(source->Swizzle, 3));
142
143 if (source->NegateBase)
144 src = negate( src,
145 GET_BIT(source->NegateBase, 0),
146 GET_BIT(source->NegateBase, 1),
147 GET_BIT(source->NegateBase, 2),
148 GET_BIT(source->NegateBase, 3));
149
150 return src;
151 }
152
153
154 static GLuint get_result_vector( struct i915_fragment_program *p,
155 const struct prog_instruction *inst )
156 {
157 switch (inst->DstReg.File) {
158 case PROGRAM_OUTPUT:
159 switch (inst->DstReg.Index) {
160 case FRAG_RESULT_COLR:
161 return UREG(REG_TYPE_OC, 0);
162 case FRAG_RESULT_DEPR:
163 p->depth_written = 1;
164 return UREG(REG_TYPE_OD, 0);
165 default:
166 i915_program_error( p, "Bad inst->DstReg.Index" );
167 return 0;
168 }
169 case PROGRAM_TEMPORARY:
170 return UREG(REG_TYPE_R, inst->DstReg.Index);
171 default:
172 i915_program_error( p, "Bad inst->DstReg.File" );
173 return 0;
174 }
175 }
176
177 static GLuint get_result_flags( const struct prog_instruction *inst )
178 {
179 GLuint flags = 0;
180
181 if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
182 if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
183 if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
184 if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
185 if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
186
187 return flags;
188 }
189
190 static GLuint translate_tex_src_target( struct i915_fragment_program *p,
191 GLubyte bit )
192 {
193 switch (bit) {
194 case TEXTURE_1D_INDEX: return D0_SAMPLE_TYPE_2D;
195 case TEXTURE_2D_INDEX: return D0_SAMPLE_TYPE_2D;
196 case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
197 case TEXTURE_3D_INDEX: return D0_SAMPLE_TYPE_VOLUME;
198 case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
199 default: i915_program_error(p, "TexSrcBit"); return 0;
200 }
201 }
202
203 #define EMIT_TEX( OP ) \
204 do { \
205 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
206 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
207 inst->TexSrcUnit, dim); \
208 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
209 /* Texel lookup */ \
210 \
211 i915_emit_texld( p, \
212 get_result_vector( p, inst ), \
213 get_result_flags( inst ), \
214 sampler, \
215 coord, \
216 OP); \
217 } while (0)
218
219 #define EMIT_ARITH( OP, N ) \
220 do { \
221 i915_emit_arith( p, \
222 OP, \
223 get_result_vector( p, inst ), \
224 get_result_flags( inst ), 0, \
225 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
226 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
227 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
228 } while (0)
229
230 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
231 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
232 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
233
234
235 /* Possible concerns:
236 *
237 * SIN, COS -- could use another taylor step?
238 * LIT -- results seem a little different to sw mesa
239 * LOG -- different to mesa on negative numbers, but this is conformant.
240 *
241 * Parse failures -- Mesa doesn't currently give a good indication
242 * internally whether a particular program string parsed or not. This
243 * can lead to confusion -- hopefully we cope with it ok now.
244 *
245 */
246 static void upload_program( struct i915_fragment_program *p )
247 {
248 const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
249 const struct prog_instruction *inst = program->Base.Instructions;
250
251 /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
252
253 /* Is this a parse-failed program? Ensure a valid program is
254 * loaded, as the flagging of an error isn't sufficient to stop
255 * this being uploaded to hardware.
256 */
257 if (inst[0].Opcode == OPCODE_END) {
258 GLuint tmp = i915_get_utemp( p );
259 i915_emit_arith( p,
260 A0_MOV,
261 UREG(REG_TYPE_OC, 0),
262 A0_DEST_CHANNEL_ALL, 0,
263 swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
264 return;
265 }
266
267 while (1) {
268 GLuint src0, src1, src2, flags;
269 GLuint tmp = 0;
270
271 switch (inst->Opcode) {
272 case OPCODE_ABS:
273 src0 = src_vector( p, &inst->SrcReg[0], program);
274 i915_emit_arith( p,
275 A0_MAX,
276 get_result_vector( p, inst ),
277 get_result_flags( inst ), 0,
278 src0, negate(src0, 1,1,1,1), 0);
279 break;
280
281 case OPCODE_ADD:
282 EMIT_2ARG_ARITH( A0_ADD );
283 break;
284
285 case OPCODE_CMP:
286 src0 = src_vector( p, &inst->SrcReg[0], program);
287 src1 = src_vector( p, &inst->SrcReg[1], program);
288 src2 = src_vector( p, &inst->SrcReg[2], program);
289 i915_emit_arith( p,
290 A0_CMP,
291 get_result_vector( p, inst ),
292 get_result_flags( inst ), 0,
293 src0, src2, src1); /* NOTE: order of src2, src1 */
294 break;
295
296 case OPCODE_COS:
297 src0 = src_vector( p, &inst->SrcReg[0], program);
298 tmp = i915_get_utemp( p );
299
300 i915_emit_arith( p,
301 A0_MUL,
302 tmp, A0_DEST_CHANNEL_X, 0,
303 src0,
304 i915_emit_const1f(p, 1.0/(M_PI * 2)),
305 0);
306
307 i915_emit_arith( p,
308 A0_MOD,
309 tmp, A0_DEST_CHANNEL_X, 0,
310 tmp,
311 0, 0 );
312
313 /* By choosing different taylor constants, could get rid of this mul:
314 */
315 i915_emit_arith( p,
316 A0_MUL,
317 tmp, A0_DEST_CHANNEL_X, 0,
318 tmp,
319 i915_emit_const1f(p, (M_PI * 2)),
320 0);
321
322 /*
323 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
324 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
325 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
326 * result = DP4 t0, cos_constants
327 */
328 i915_emit_arith( p,
329 A0_MUL,
330 tmp, A0_DEST_CHANNEL_XY, 0,
331 swizzle(tmp, X,X,ONE,ONE),
332 swizzle(tmp, X,ONE,ONE,ONE), 0);
333
334 i915_emit_arith( p,
335 A0_MUL,
336 tmp, A0_DEST_CHANNEL_XYZ, 0,
337 swizzle(tmp, X,Y,X,ONE),
338 swizzle(tmp, X,X,ONE,ONE), 0);
339
340 i915_emit_arith( p,
341 A0_MUL,
342 tmp, A0_DEST_CHANNEL_XYZ, 0,
343 swizzle(tmp, X,X,Z,ONE),
344 swizzle(tmp, Z,ONE,ONE,ONE), 0);
345
346 i915_emit_arith( p,
347 A0_DP4,
348 get_result_vector( p, inst ),
349 get_result_flags( inst ), 0,
350 swizzle(tmp, ONE,Z,Y,X),
351 i915_emit_const4fv( p, cos_constants ), 0);
352
353 break;
354
355 case OPCODE_DP3:
356 EMIT_2ARG_ARITH( A0_DP3 );
357 break;
358
359 case OPCODE_DP4:
360 EMIT_2ARG_ARITH( A0_DP4 );
361 break;
362
363 case OPCODE_DPH:
364 src0 = src_vector( p, &inst->SrcReg[0], program);
365 src1 = src_vector( p, &inst->SrcReg[1], program);
366
367 i915_emit_arith( p,
368 A0_DP4,
369 get_result_vector( p, inst ),
370 get_result_flags( inst ), 0,
371 swizzle(src0, X,Y,Z,ONE), src1, 0);
372 break;
373
374 case OPCODE_DST:
375 src0 = src_vector( p, &inst->SrcReg[0], program);
376 src1 = src_vector( p, &inst->SrcReg[1], program);
377
378 /* result[0] = 1 * 1;
379 * result[1] = a[1] * b[1];
380 * result[2] = a[2] * 1;
381 * result[3] = 1 * b[3];
382 */
383 i915_emit_arith( p,
384 A0_MUL,
385 get_result_vector( p, inst ),
386 get_result_flags( inst ), 0,
387 swizzle(src0, ONE, Y, Z, ONE),
388 swizzle(src1, ONE, Y, ONE, W ),
389 0);
390 break;
391
392 case OPCODE_EX2:
393 src0 = src_vector( p, &inst->SrcReg[0], program);
394
395 i915_emit_arith( p,
396 A0_EXP,
397 get_result_vector( p, inst ),
398 get_result_flags( inst ), 0,
399 swizzle(src0,X,X,X,X), 0, 0);
400 break;
401
402 case OPCODE_FLR:
403 EMIT_1ARG_ARITH( A0_FLR );
404 break;
405
406 case OPCODE_FRC:
407 EMIT_1ARG_ARITH( A0_FRC );
408 break;
409
410 case OPCODE_KIL:
411 src0 = src_vector( p, &inst->SrcReg[0], program);
412 tmp = i915_get_utemp( p );
413
414 i915_emit_texld( p,
415 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
416 0,
417 src0,
418 T0_TEXKILL );
419 break;
420
421 case OPCODE_LG2:
422 src0 = src_vector( p, &inst->SrcReg[0], program);
423
424 i915_emit_arith( p,
425 A0_LOG,
426 get_result_vector( p, inst ),
427 get_result_flags( inst ), 0,
428 swizzle(src0,X,X,X,X), 0, 0);
429 break;
430
431 case OPCODE_LIT:
432 src0 = src_vector( p, &inst->SrcReg[0], program);
433 tmp = i915_get_utemp( p );
434
435 /* tmp = max( a.xyzw, a.00zw )
436 * XXX: Clamp tmp.w to -128..128
437 * tmp.y = log(tmp.y)
438 * tmp.y = tmp.w * tmp.y
439 * tmp.y = exp(tmp.y)
440 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
441 */
442 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
443 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
444
445 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
446 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
447
448 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
449 swizzle(tmp, ZERO, Y, ZERO, ZERO),
450 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
451
452 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
453 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
454
455 i915_emit_arith( p, A0_CMP,
456 get_result_vector( p, inst ),
457 get_result_flags( inst ), 0,
458 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
459 swizzle(tmp, ONE, X, ZERO, ONE),
460 swizzle(tmp, ONE, X, Y, ONE));
461
462 break;
463
464 case OPCODE_LRP:
465 src0 = src_vector( p, &inst->SrcReg[0], program);
466 src1 = src_vector( p, &inst->SrcReg[1], program);
467 src2 = src_vector( p, &inst->SrcReg[2], program);
468 flags = get_result_flags( inst );
469 tmp = i915_get_utemp( p );
470
471 /* b*a + c*(1-a)
472 *
473 * b*a + c - ca
474 *
475 * tmp = b*a + c,
476 * result = (-c)*a + tmp
477 */
478 i915_emit_arith( p, A0_MAD, tmp,
479 flags & A0_DEST_CHANNEL_ALL, 0,
480 src1, src0, src2 );
481
482 i915_emit_arith( p, A0_MAD,
483 get_result_vector( p, inst ),
484 flags, 0,
485 negate(src2, 1,1,1,1), src0, tmp );
486 break;
487
488 case OPCODE_MAD:
489 EMIT_3ARG_ARITH( A0_MAD );
490 break;
491
492 case OPCODE_MAX:
493 EMIT_2ARG_ARITH( A0_MAX );
494 break;
495
496 case OPCODE_MIN:
497 src0 = src_vector( p, &inst->SrcReg[0], program);
498 src1 = src_vector( p, &inst->SrcReg[1], program);
499 tmp = i915_get_utemp( p );
500 flags = get_result_flags( inst );
501
502 i915_emit_arith( p,
503 A0_MAX,
504 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
505 negate(src0,1,1,1,1),
506 negate(src1,1,1,1,1), 0);
507
508 i915_emit_arith( p,
509 A0_MOV,
510 get_result_vector( p, inst ),
511 flags, 0,
512 negate(tmp, 1,1,1,1), 0, 0);
513 break;
514
515 case OPCODE_MOV:
516 EMIT_1ARG_ARITH( A0_MOV );
517 break;
518
519 case OPCODE_MUL:
520 EMIT_2ARG_ARITH( A0_MUL );
521 break;
522
523 case OPCODE_POW:
524 src0 = src_vector( p, &inst->SrcReg[0], program);
525 src1 = src_vector( p, &inst->SrcReg[1], program);
526 tmp = i915_get_utemp( p );
527 flags = get_result_flags( inst );
528
529 /* XXX: masking on intermediate values, here and elsewhere.
530 */
531 i915_emit_arith( p,
532 A0_LOG,
533 tmp, A0_DEST_CHANNEL_X, 0,
534 swizzle(src0,X,X,X,X), 0, 0);
535
536 i915_emit_arith( p,
537 A0_MUL,
538 tmp, A0_DEST_CHANNEL_X, 0,
539 tmp, src1, 0);
540
541
542 i915_emit_arith( p,
543 A0_EXP,
544 get_result_vector( p, inst ),
545 flags, 0,
546 swizzle(tmp,X,X,X,X), 0, 0);
547
548 break;
549
550 case OPCODE_RCP:
551 src0 = src_vector( p, &inst->SrcReg[0], program);
552
553 i915_emit_arith( p,
554 A0_RCP,
555 get_result_vector( p, inst ),
556 get_result_flags( inst ), 0,
557 swizzle(src0,X,X,X,X), 0, 0);
558 break;
559
560 case OPCODE_RSQ:
561
562 src0 = src_vector( p, &inst->SrcReg[0], program);
563
564 i915_emit_arith( p,
565 A0_RSQ,
566 get_result_vector( p, inst ),
567 get_result_flags( inst ), 0,
568 swizzle(src0,X,X,X,X), 0, 0);
569 break;
570
571 case OPCODE_SCS:
572 src0 = src_vector( p, &inst->SrcReg[0], program);
573 tmp = i915_get_utemp( p );
574
575 /*
576 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
577 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
578 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
579 * scs.x = DP4 t1, sin_constants
580 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
581 * scs.y = DP4 t1, cos_constants
582 */
583 i915_emit_arith( p,
584 A0_MUL,
585 tmp, A0_DEST_CHANNEL_XY, 0,
586 swizzle(src0, X,X,ONE,ONE),
587 swizzle(src0, X,ONE,ONE,ONE), 0);
588
589 i915_emit_arith( p,
590 A0_MUL,
591 tmp, A0_DEST_CHANNEL_ALL, 0,
592 swizzle(tmp, X,Y,X,Y),
593 swizzle(tmp, X,X,ONE,ONE), 0);
594
595 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
596 GLuint tmp1;
597
598 if (inst->DstReg.WriteMask & WRITEMASK_X)
599 tmp1 = i915_get_utemp( p );
600 else
601 tmp1 = tmp;
602
603 i915_emit_arith( p,
604 A0_MUL,
605 tmp1, A0_DEST_CHANNEL_ALL, 0,
606 swizzle(tmp, X,Y,Y,W),
607 swizzle(tmp, X,Z,ONE,ONE), 0);
608
609 i915_emit_arith( p,
610 A0_DP4,
611 get_result_vector( p, inst ),
612 A0_DEST_CHANNEL_Y, 0,
613 swizzle(tmp1, W,Z,Y,X),
614 i915_emit_const4fv( p, sin_constants ), 0);
615 }
616
617 if (inst->DstReg.WriteMask & WRITEMASK_X) {
618 i915_emit_arith( p,
619 A0_MUL,
620 tmp, A0_DEST_CHANNEL_XYZ, 0,
621 swizzle(tmp, X,X,Z,ONE),
622 swizzle(tmp, Z,ONE,ONE,ONE), 0);
623
624 i915_emit_arith( p,
625 A0_DP4,
626 get_result_vector( p, inst ),
627 A0_DEST_CHANNEL_X, 0,
628 swizzle(tmp, ONE,Z,Y,X),
629 i915_emit_const4fv( p, cos_constants ), 0);
630 }
631 break;
632
633 case OPCODE_SGE:
634 EMIT_2ARG_ARITH( A0_SGE );
635 break;
636
637 case OPCODE_SIN:
638 src0 = src_vector( p, &inst->SrcReg[0], program);
639 tmp = i915_get_utemp( p );
640
641 i915_emit_arith( p,
642 A0_MUL,
643 tmp, A0_DEST_CHANNEL_X, 0,
644 src0,
645 i915_emit_const1f(p, 1.0/(M_PI * 2)),
646 0);
647
648 i915_emit_arith( p,
649 A0_MOD,
650 tmp, A0_DEST_CHANNEL_X, 0,
651 tmp,
652 0, 0 );
653
654 /* By choosing different taylor constants, could get rid of this mul:
655 */
656 i915_emit_arith( p,
657 A0_MUL,
658 tmp, A0_DEST_CHANNEL_X, 0,
659 tmp,
660 i915_emit_const1f(p, (M_PI * 2)),
661 0);
662
663 /*
664 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
665 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
666 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
667 * result = DP4 t1.wzyx, sin_constants
668 */
669 i915_emit_arith( p,
670 A0_MUL,
671 tmp, A0_DEST_CHANNEL_XY, 0,
672 swizzle(tmp, X,X,ONE,ONE),
673 swizzle(tmp, X,ONE,ONE,ONE), 0);
674
675 i915_emit_arith( p,
676 A0_MUL,
677 tmp, A0_DEST_CHANNEL_ALL, 0,
678 swizzle(tmp, X,Y,X,Y),
679 swizzle(tmp, X,X,ONE,ONE), 0);
680
681 i915_emit_arith( p,
682 A0_MUL,
683 tmp, A0_DEST_CHANNEL_ALL, 0,
684 swizzle(tmp, X,Y,Y,W),
685 swizzle(tmp, X,Z,ONE,ONE), 0);
686
687 i915_emit_arith( p,
688 A0_DP4,
689 get_result_vector( p, inst ),
690 get_result_flags( inst ), 0,
691 swizzle(tmp, W, Z, Y, X ),
692 i915_emit_const4fv( p, sin_constants ), 0);
693 break;
694
695 case OPCODE_SLT:
696 EMIT_2ARG_ARITH( A0_SLT );
697 break;
698
699 case OPCODE_SUB:
700 src0 = src_vector( p, &inst->SrcReg[0], program);
701 src1 = src_vector( p, &inst->SrcReg[1], program);
702
703 i915_emit_arith( p,
704 A0_ADD,
705 get_result_vector( p, inst ),
706 get_result_flags( inst ), 0,
707 src0, negate(src1, 1,1,1,1), 0);
708 break;
709
710 case OPCODE_SWZ:
711 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
712 break;
713
714 case OPCODE_TEX:
715 EMIT_TEX( T0_TEXLD );
716 break;
717
718 case OPCODE_TXB:
719 EMIT_TEX( T0_TEXLDB );
720 break;
721
722 case OPCODE_TXP:
723 EMIT_TEX( T0_TEXLDP );
724 break;
725
726 case OPCODE_XPD:
727 /* Cross product:
728 * result.x = src0.y * src1.z - src0.z * src1.y;
729 * result.y = src0.z * src1.x - src0.x * src1.z;
730 * result.z = src0.x * src1.y - src0.y * src1.x;
731 * result.w = undef;
732 */
733 src0 = src_vector( p, &inst->SrcReg[0], program);
734 src1 = src_vector( p, &inst->SrcReg[1], program);
735 tmp = i915_get_utemp( p );
736
737 i915_emit_arith( p,
738 A0_MUL,
739 tmp, A0_DEST_CHANNEL_ALL, 0,
740 swizzle(src0,Z,X,Y,ONE),
741 swizzle(src1,Y,Z,X,ONE), 0);
742
743 i915_emit_arith( p,
744 A0_MAD,
745 get_result_vector( p, inst ),
746 get_result_flags( inst ), 0,
747 swizzle(src0,Y,Z,X,ONE),
748 swizzle(src1,Z,X,Y,ONE),
749 negate(tmp,1,1,1,0));
750 break;
751
752 case OPCODE_END:
753 return;
754
755 default:
756 i915_program_error( p, "bad opcode" );
757 return;
758 }
759
760 inst++;
761 i915_release_utemps( p );
762 }
763 }
764
765 /* Rather than trying to intercept and jiggle depth writes during
766 * emit, just move the value into its correct position at the end of
767 * the program:
768 */
769 static void fixup_depth_write( struct i915_fragment_program *p )
770 {
771 if (p->depth_written) {
772 GLuint depth = UREG(REG_TYPE_OD, 0);
773
774 i915_emit_arith( p,
775 A0_MOV,
776 depth, A0_DEST_CHANNEL_W, 0,
777 swizzle(depth,X,Y,Z,Z),
778 0, 0);
779 }
780 }
781
782
783 #define FRAG_BIT_TEX(n) (FRAG_BIT_TEX0 << (n))
784
785
786 static void check_wpos( struct i915_fragment_program *p )
787 {
788 GLuint inputs = p->FragProg.Base.InputsRead;
789 GLint i;
790
791 p->wpos_tex = -1;
792
793 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
794 if (inputs & FRAG_BIT_TEX(i))
795 continue;
796 else if (inputs & FRAG_BIT_WPOS) {
797 p->wpos_tex = i;
798 inputs &= ~FRAG_BIT_WPOS;
799 }
800 }
801
802 if (inputs & FRAG_BIT_WPOS) {
803 i915_program_error(p, "No free texcoord for wpos value");
804 }
805 }
806
807
808 static void translate_program( struct i915_fragment_program *p )
809 {
810 i915ContextPtr i915 = I915_CONTEXT(p->ctx);
811
812 i915_init_program( i915, p );
813 check_wpos( p );
814 upload_program( p );
815 fixup_depth_write( p );
816 i915_fini_program( p );
817
818 p->translated = 1;
819 }
820
821
822 static void track_params( struct i915_fragment_program *p )
823 {
824 GLint i;
825
826 if (p->nr_params)
827 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
828
829 for (i = 0; i < p->nr_params; i++) {
830 GLint reg = p->param[i].reg;
831 COPY_4V( p->constant[reg], p->param[i].values );
832 }
833
834 p->params_uptodate = 1;
835 p->on_hardware = 0; /* overkill */
836 }
837
838
839 static void i915BindProgram( GLcontext *ctx,
840 GLenum target,
841 struct gl_program *prog )
842 {
843 if (target == GL_FRAGMENT_PROGRAM_ARB) {
844 i915ContextPtr i915 = I915_CONTEXT(ctx);
845 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
846
847 if (i915->current_program == p)
848 return;
849
850 if (i915->current_program) {
851 i915->current_program->on_hardware = 0;
852 i915->current_program->params_uptodate = 0;
853 }
854
855 i915->current_program = p;
856
857 assert(p->on_hardware == 0);
858 assert(p->params_uptodate == 0);
859
860 /* Hack: make sure fog is correctly enabled according to this
861 * fragment program's fog options.
862 */
863 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
864 ctx->FragmentProgram.Enabled );
865 }
866 }
867
868 static struct gl_program *i915NewProgram( GLcontext *ctx,
869 GLenum target,
870 GLuint id )
871 {
872 switch (target) {
873 case GL_VERTEX_PROGRAM_ARB:
874 return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
875 target, id );
876
877 case GL_FRAGMENT_PROGRAM_ARB: {
878 struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
879 if (prog) {
880 i915_init_program( I915_CONTEXT(ctx), prog );
881
882 return _mesa_init_fragment_program( ctx, &prog->FragProg,
883 target, id );
884 }
885 else
886 return NULL;
887 }
888
889 default:
890 /* Just fallback:
891 */
892 return _mesa_new_program( ctx, target, id );
893 }
894 }
895
896 static void i915DeleteProgram( GLcontext *ctx,
897 struct gl_program *prog )
898 {
899 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
900 i915ContextPtr i915 = I915_CONTEXT(ctx);
901 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
902
903 if (i915->current_program == p)
904 i915->current_program = 0;
905 }
906
907 _mesa_delete_program( ctx, prog );
908 }
909
910
911 static GLboolean i915IsProgramNative( GLcontext *ctx,
912 GLenum target,
913 struct gl_program *prog )
914 {
915 if (target == GL_FRAGMENT_PROGRAM_ARB) {
916 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
917
918 if (!p->translated)
919 translate_program( p );
920
921 return !p->error;
922 }
923 else
924 return GL_TRUE;
925 }
926
927 static void i915ProgramStringNotify( GLcontext *ctx,
928 GLenum target,
929 struct gl_program *prog )
930 {
931 if (target == GL_FRAGMENT_PROGRAM_ARB) {
932 struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
933 p->translated = 0;
934
935 /* Hack: make sure fog is correctly enabled according to this
936 * fragment program's fog options.
937 */
938 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
939 ctx->FragmentProgram.Enabled );
940 }
941
942 _tnl_program_string(ctx, target, prog);
943 }
944
945
946 void i915ValidateFragmentProgram( i915ContextPtr i915 )
947 {
948 GLcontext *ctx = &i915->intel.ctx;
949 intelContextPtr intel = INTEL_CONTEXT(ctx);
950 TNLcontext *tnl = TNL_CONTEXT(ctx);
951 struct vertex_buffer *VB = &tnl->vb;
952
953 struct i915_fragment_program *p =
954 (struct i915_fragment_program *)ctx->FragmentProgram._Current;
955
956 const GLuint inputsRead = p->FragProg.Base.InputsRead;
957 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
958 GLuint s2 = S2_TEXCOORD_NONE;
959 int i, offset = 0;
960
961 if (i915->current_program != p)
962 {
963 if (i915->current_program) {
964 i915->current_program->on_hardware = 0;
965 i915->current_program->params_uptodate = 0;
966 }
967
968 i915->current_program = p;
969 }
970
971
972 /* Important:
973 */
974 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
975
976 if (!p->translated)
977 translate_program( p );
978
979 intel->vertex_attr_count = 0;
980 intel->wpos_offset = 0;
981 intel->wpos_size = 0;
982 intel->coloroffset = 0;
983 intel->specoffset = 0;
984
985 if (inputsRead & FRAG_BITS_TEX_ANY) {
986 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
987 }
988 else {
989 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
990 }
991
992 if (inputsRead & FRAG_BIT_COL0) {
993 intel->coloroffset = offset / 4;
994 EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
995 }
996
997 if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) ||
998 i915->vertex_fog != I915_FOG_NONE) {
999
1000 if (inputsRead & FRAG_BIT_COL1) {
1001 intel->specoffset = offset / 4;
1002 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
1003 }
1004 else
1005 EMIT_PAD(3);
1006
1007 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
1008 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
1009 else
1010 EMIT_PAD( 1 );
1011 }
1012
1013 #if 0
1014 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1015 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
1016 }
1017 #endif
1018
1019 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1020 if (inputsRead & FRAG_BIT_TEX(i)) {
1021 int sz = VB->TexCoordPtr[i]->size;
1022
1023 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1024 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1025
1026 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
1027 }
1028 else if (i == p->wpos_tex) {
1029
1030 /* If WPOS is required, duplicate the XYZ position data in an
1031 * unused texture coordinate:
1032 */
1033 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1034 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1035
1036 intel->wpos_offset = offset;
1037 intel->wpos_size = 3 * sizeof(GLuint);
1038
1039 EMIT_PAD( intel->wpos_size );
1040 }
1041 }
1042
1043 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1044 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1045
1046 I915_STATECHANGE( i915, I915_UPLOAD_CTX );
1047
1048 /* Must do this *after* statechange, so as not to affect
1049 * buffered vertices reliant on the old state:
1050 */
1051 intel->vertex_size = _tnl_install_attrs( &intel->ctx,
1052 intel->vertex_attrs,
1053 intel->vertex_attr_count,
1054 intel->ViewportMatrix.m, 0 );
1055
1056 intel->vertex_size >>= 2;
1057
1058 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1059 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1060
1061 assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
1062 }
1063
1064 if (!p->params_uptodate)
1065 track_params( p );
1066
1067 if (!p->on_hardware)
1068 i915_upload_program( i915, p );
1069 }
1070
1071 void i915InitFragProgFuncs( struct dd_function_table *functions )
1072 {
1073 functions->BindProgram = i915BindProgram;
1074 functions->NewProgram = i915NewProgram;
1075 functions->DeleteProgram = i915DeleteProgram;
1076 functions->IsProgramNative = i915IsProgramNative;
1077 functions->ProgramStringNotify = i915ProgramStringNotify;
1078 }