merge current trunk into vbo branch
[mesa.git] / src / mesa / drivers / dri / i915tex / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "glheader.h"
29 #include "macros.h"
30 #include "enums.h"
31
32 #include "tnl/tnl.h"
33 #include "tnl/t_context.h"
34 #include "intel_batchbuffer.h"
35
36 #include "i915_reg.h"
37 #include "i915_context.h"
38 #include "i915_program.h"
39
40 #include "program_instruction.h"
41 #include "program.h"
42
43
44
45 /* 1, -1/3!, 1/5!, -1/7! */
46 static const GLfloat sin_constants[4] = { 1.0,
47 -1.0 / (3 * 2 * 1),
48 1.0 / (5 * 4 * 3 * 2 * 1),
49 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
50 };
51
52 /* 1, -1/2!, 1/4!, -1/6! */
53 static const GLfloat cos_constants[4] = { 1.0,
54 -1.0 / (2 * 1),
55 1.0 / (4 * 3 * 2 * 1),
56 -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
57 };
58
59 /**
60 * Retrieve a ureg for the given source register. Will emit
61 * constants, apply swizzling and negation as needed.
62 */
63 static GLuint
64 src_vector(struct i915_fragment_program *p,
65 const struct prog_src_register *source,
66 const struct gl_fragment_program *program)
67 {
68 GLuint src;
69
70 switch (source->File) {
71
72 /* Registers:
73 */
74 case PROGRAM_TEMPORARY:
75 if (source->Index >= I915_MAX_TEMPORARY) {
76 i915_program_error(p, "Exceeded max temporary reg");
77 return 0;
78 }
79 src = UREG(REG_TYPE_R, source->Index);
80 break;
81 case PROGRAM_INPUT:
82 switch (source->Index) {
83 case FRAG_ATTRIB_WPOS:
84 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
85 break;
86 case FRAG_ATTRIB_COL0:
87 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
88 break;
89 case FRAG_ATTRIB_COL1:
90 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
91 src = swizzle(src, X, Y, Z, ONE);
92 break;
93 case FRAG_ATTRIB_FOGC:
94 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
95 src = swizzle(src, W, W, W, W);
96 break;
97 case FRAG_ATTRIB_TEX0:
98 case FRAG_ATTRIB_TEX1:
99 case FRAG_ATTRIB_TEX2:
100 case FRAG_ATTRIB_TEX3:
101 case FRAG_ATTRIB_TEX4:
102 case FRAG_ATTRIB_TEX5:
103 case FRAG_ATTRIB_TEX6:
104 case FRAG_ATTRIB_TEX7:
105 src = i915_emit_decl(p, REG_TYPE_T,
106 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
107 D0_CHANNEL_ALL);
108 break;
109
110 default:
111 i915_program_error(p, "Bad source->Index");
112 return 0;
113 }
114 break;
115
116 /* Various paramters and env values. All emitted to
117 * hardware as program constants.
118 */
119 case PROGRAM_LOCAL_PARAM:
120 src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
121 break;
122
123 case PROGRAM_ENV_PARAM:
124 src =
125 i915_emit_param4fv(p,
126 p->ctx->FragmentProgram.Parameters[source->
127 Index]);
128 break;
129
130 case PROGRAM_STATE_VAR:
131 case PROGRAM_NAMED_PARAM:
132 src =
133 i915_emit_param4fv(p,
134 program->Base.Parameters->ParameterValues[source->
135 Index]);
136 break;
137
138 default:
139 i915_program_error(p, "Bad source->File");
140 return 0;
141 }
142
143 src = swizzle(src,
144 GET_SWZ(source->Swizzle, 0),
145 GET_SWZ(source->Swizzle, 1),
146 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
147
148 if (source->NegateBase)
149 src = negate(src,
150 GET_BIT(source->NegateBase, 0),
151 GET_BIT(source->NegateBase, 1),
152 GET_BIT(source->NegateBase, 2),
153 GET_BIT(source->NegateBase, 3));
154
155 return src;
156 }
157
158
159 static GLuint
160 get_result_vector(struct i915_fragment_program *p,
161 const struct prog_instruction *inst)
162 {
163 switch (inst->DstReg.File) {
164 case PROGRAM_OUTPUT:
165 switch (inst->DstReg.Index) {
166 case FRAG_RESULT_COLR:
167 return UREG(REG_TYPE_OC, 0);
168 case FRAG_RESULT_DEPR:
169 p->depth_written = 1;
170 return UREG(REG_TYPE_OD, 0);
171 default:
172 i915_program_error(p, "Bad inst->DstReg.Index");
173 return 0;
174 }
175 case PROGRAM_TEMPORARY:
176 return UREG(REG_TYPE_R, inst->DstReg.Index);
177 default:
178 i915_program_error(p, "Bad inst->DstReg.File");
179 return 0;
180 }
181 }
182
183 static GLuint
184 get_result_flags(const struct prog_instruction *inst)
185 {
186 GLuint flags = 0;
187
188 if (inst->SaturateMode == SATURATE_ZERO_ONE)
189 flags |= A0_DEST_SATURATE;
190 if (inst->DstReg.WriteMask & WRITEMASK_X)
191 flags |= A0_DEST_CHANNEL_X;
192 if (inst->DstReg.WriteMask & WRITEMASK_Y)
193 flags |= A0_DEST_CHANNEL_Y;
194 if (inst->DstReg.WriteMask & WRITEMASK_Z)
195 flags |= A0_DEST_CHANNEL_Z;
196 if (inst->DstReg.WriteMask & WRITEMASK_W)
197 flags |= A0_DEST_CHANNEL_W;
198
199 return flags;
200 }
201
202 static GLuint
203 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
204 {
205 switch (bit) {
206 case TEXTURE_1D_INDEX:
207 return D0_SAMPLE_TYPE_2D;
208 case TEXTURE_2D_INDEX:
209 return D0_SAMPLE_TYPE_2D;
210 case TEXTURE_RECT_INDEX:
211 return D0_SAMPLE_TYPE_2D;
212 case TEXTURE_3D_INDEX:
213 return D0_SAMPLE_TYPE_VOLUME;
214 case TEXTURE_CUBE_INDEX:
215 return D0_SAMPLE_TYPE_CUBE;
216 default:
217 i915_program_error(p, "TexSrcBit");
218 return 0;
219 }
220 }
221
222 #define EMIT_TEX( OP ) \
223 do { \
224 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
225 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
226 inst->TexSrcUnit, dim); \
227 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
228 /* Texel lookup */ \
229 \
230 i915_emit_texld( p, \
231 get_result_vector( p, inst ), \
232 get_result_flags( inst ), \
233 sampler, \
234 coord, \
235 OP); \
236 } while (0)
237
238 #define EMIT_ARITH( OP, N ) \
239 do { \
240 i915_emit_arith( p, \
241 OP, \
242 get_result_vector( p, inst ), \
243 get_result_flags( inst ), 0, \
244 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
245 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
246 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
247 } while (0)
248
249 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
250 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
251 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
252
253
254 /* Possible concerns:
255 *
256 * SIN, COS -- could use another taylor step?
257 * LIT -- results seem a little different to sw mesa
258 * LOG -- different to mesa on negative numbers, but this is conformant.
259 *
260 * Parse failures -- Mesa doesn't currently give a good indication
261 * internally whether a particular program string parsed or not. This
262 * can lead to confusion -- hopefully we cope with it ok now.
263 *
264 */
265 static void
266 upload_program(struct i915_fragment_program *p)
267 {
268 const struct gl_fragment_program *program =
269 p->ctx->FragmentProgram._Current;
270 const struct prog_instruction *inst = program->Base.Instructions;
271
272 /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
273
274 /* Is this a parse-failed program? Ensure a valid program is
275 * loaded, as the flagging of an error isn't sufficient to stop
276 * this being uploaded to hardware.
277 */
278 if (inst[0].Opcode == OPCODE_END) {
279 GLuint tmp = i915_get_utemp(p);
280 i915_emit_arith(p,
281 A0_MOV,
282 UREG(REG_TYPE_OC, 0),
283 A0_DEST_CHANNEL_ALL, 0,
284 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
285 return;
286 }
287
288 while (1) {
289 GLuint src0, src1, src2, flags;
290 GLuint tmp = 0;
291
292 switch (inst->Opcode) {
293 case OPCODE_ABS:
294 src0 = src_vector(p, &inst->SrcReg[0], program);
295 i915_emit_arith(p,
296 A0_MAX,
297 get_result_vector(p, inst),
298 get_result_flags(inst), 0,
299 src0, negate(src0, 1, 1, 1, 1), 0);
300 break;
301
302 case OPCODE_ADD:
303 EMIT_2ARG_ARITH(A0_ADD);
304 break;
305
306 case OPCODE_CMP:
307 src0 = src_vector(p, &inst->SrcReg[0], program);
308 src1 = src_vector(p, &inst->SrcReg[1], program);
309 src2 = src_vector(p, &inst->SrcReg[2], program);
310 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */
311 break;
312
313 case OPCODE_COS:
314 src0 = src_vector(p, &inst->SrcReg[0], program);
315 tmp = i915_get_utemp(p);
316
317 i915_emit_arith(p,
318 A0_MUL,
319 tmp, A0_DEST_CHANNEL_X, 0,
320 src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
321
322 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
323
324 /* By choosing different taylor constants, could get rid of this mul:
325 */
326 i915_emit_arith(p,
327 A0_MUL,
328 tmp, A0_DEST_CHANNEL_X, 0,
329 tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
330
331 /*
332 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
333 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
334 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
335 * result = DP4 t0, cos_constants
336 */
337 i915_emit_arith(p,
338 A0_MUL,
339 tmp, A0_DEST_CHANNEL_XY, 0,
340 swizzle(tmp, X, X, ONE, ONE),
341 swizzle(tmp, X, ONE, ONE, ONE), 0);
342
343 i915_emit_arith(p,
344 A0_MUL,
345 tmp, A0_DEST_CHANNEL_XYZ, 0,
346 swizzle(tmp, X, Y, X, ONE),
347 swizzle(tmp, X, X, ONE, ONE), 0);
348
349 i915_emit_arith(p,
350 A0_MUL,
351 tmp, A0_DEST_CHANNEL_XYZ, 0,
352 swizzle(tmp, X, X, Z, ONE),
353 swizzle(tmp, Z, ONE, ONE, ONE), 0);
354
355 i915_emit_arith(p,
356 A0_DP4,
357 get_result_vector(p, inst),
358 get_result_flags(inst), 0,
359 swizzle(tmp, ONE, Z, Y, X),
360 i915_emit_const4fv(p, cos_constants), 0);
361
362 break;
363
364 case OPCODE_DP3:
365 EMIT_2ARG_ARITH(A0_DP3);
366 break;
367
368 case OPCODE_DP4:
369 EMIT_2ARG_ARITH(A0_DP4);
370 break;
371
372 case OPCODE_DPH:
373 src0 = src_vector(p, &inst->SrcReg[0], program);
374 src1 = src_vector(p, &inst->SrcReg[1], program);
375
376 i915_emit_arith(p,
377 A0_DP4,
378 get_result_vector(p, inst),
379 get_result_flags(inst), 0,
380 swizzle(src0, X, Y, Z, ONE), src1, 0);
381 break;
382
383 case OPCODE_DST:
384 src0 = src_vector(p, &inst->SrcReg[0], program);
385 src1 = src_vector(p, &inst->SrcReg[1], program);
386
387 /* result[0] = 1 * 1;
388 * result[1] = a[1] * b[1];
389 * result[2] = a[2] * 1;
390 * result[3] = 1 * b[3];
391 */
392 i915_emit_arith(p,
393 A0_MUL,
394 get_result_vector(p, inst),
395 get_result_flags(inst), 0,
396 swizzle(src0, ONE, Y, Z, ONE),
397 swizzle(src1, ONE, Y, ONE, W), 0);
398 break;
399
400 case OPCODE_EX2:
401 src0 = src_vector(p, &inst->SrcReg[0], program);
402
403 i915_emit_arith(p,
404 A0_EXP,
405 get_result_vector(p, inst),
406 get_result_flags(inst), 0,
407 swizzle(src0, X, X, X, X), 0, 0);
408 break;
409
410 case OPCODE_FLR:
411 EMIT_1ARG_ARITH(A0_FLR);
412 break;
413
414 case OPCODE_FRC:
415 EMIT_1ARG_ARITH(A0_FRC);
416 break;
417
418 case OPCODE_KIL:
419 src0 = src_vector(p, &inst->SrcReg[0], program);
420 tmp = i915_get_utemp(p);
421
422 i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
423 0, src0, T0_TEXKILL);
424 break;
425
426 case OPCODE_LG2:
427 src0 = src_vector(p, &inst->SrcReg[0], program);
428
429 i915_emit_arith(p,
430 A0_LOG,
431 get_result_vector(p, inst),
432 get_result_flags(inst), 0,
433 swizzle(src0, X, X, X, X), 0, 0);
434 break;
435
436 case OPCODE_LIT:
437 src0 = src_vector(p, &inst->SrcReg[0], program);
438 tmp = i915_get_utemp(p);
439
440 /* tmp = max( a.xyzw, a.00zw )
441 * XXX: Clamp tmp.w to -128..128
442 * tmp.y = log(tmp.y)
443 * tmp.y = tmp.w * tmp.y
444 * tmp.y = exp(tmp.y)
445 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
446 */
447 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
448 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
449
450 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
451 swizzle(tmp, Y, Y, Y, Y), 0, 0);
452
453 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
454 swizzle(tmp, ZERO, Y, ZERO, ZERO),
455 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
456
457 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
458 swizzle(tmp, Y, Y, Y, Y), 0, 0);
459
460 i915_emit_arith(p, A0_CMP,
461 get_result_vector(p, inst),
462 get_result_flags(inst), 0,
463 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
464 swizzle(tmp, ONE, X, ZERO, ONE),
465 swizzle(tmp, ONE, X, Y, ONE));
466
467 break;
468
469 case OPCODE_LRP:
470 src0 = src_vector(p, &inst->SrcReg[0], program);
471 src1 = src_vector(p, &inst->SrcReg[1], program);
472 src2 = src_vector(p, &inst->SrcReg[2], program);
473 flags = get_result_flags(inst);
474 tmp = i915_get_utemp(p);
475
476 /* b*a + c*(1-a)
477 *
478 * b*a + c - ca
479 *
480 * tmp = b*a + c,
481 * result = (-c)*a + tmp
482 */
483 i915_emit_arith(p, A0_MAD, tmp,
484 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
485
486 i915_emit_arith(p, A0_MAD,
487 get_result_vector(p, inst),
488 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
489 break;
490
491 case OPCODE_MAD:
492 EMIT_3ARG_ARITH(A0_MAD);
493 break;
494
495 case OPCODE_MAX:
496 EMIT_2ARG_ARITH(A0_MAX);
497 break;
498
499 case OPCODE_MIN:
500 src0 = src_vector(p, &inst->SrcReg[0], program);
501 src1 = src_vector(p, &inst->SrcReg[1], program);
502 tmp = i915_get_utemp(p);
503 flags = get_result_flags(inst);
504
505 i915_emit_arith(p,
506 A0_MAX,
507 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
508 negate(src0, 1, 1, 1, 1),
509 negate(src1, 1, 1, 1, 1), 0);
510
511 i915_emit_arith(p,
512 A0_MOV,
513 get_result_vector(p, inst),
514 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
515 break;
516
517 case OPCODE_MOV:
518 EMIT_1ARG_ARITH(A0_MOV);
519 break;
520
521 case OPCODE_MUL:
522 EMIT_2ARG_ARITH(A0_MUL);
523 break;
524
525 case OPCODE_POW:
526 src0 = src_vector(p, &inst->SrcReg[0], program);
527 src1 = src_vector(p, &inst->SrcReg[1], program);
528 tmp = i915_get_utemp(p);
529 flags = get_result_flags(inst);
530
531 /* XXX: masking on intermediate values, here and elsewhere.
532 */
533 i915_emit_arith(p,
534 A0_LOG,
535 tmp, A0_DEST_CHANNEL_X, 0,
536 swizzle(src0, X, X, X, X), 0, 0);
537
538 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
539
540
541 i915_emit_arith(p,
542 A0_EXP,
543 get_result_vector(p, inst),
544 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
545
546 break;
547
548 case OPCODE_RCP:
549 src0 = src_vector(p, &inst->SrcReg[0], program);
550
551 i915_emit_arith(p,
552 A0_RCP,
553 get_result_vector(p, inst),
554 get_result_flags(inst), 0,
555 swizzle(src0, X, X, X, X), 0, 0);
556 break;
557
558 case OPCODE_RSQ:
559
560 src0 = src_vector(p, &inst->SrcReg[0], program);
561
562 i915_emit_arith(p,
563 A0_RSQ,
564 get_result_vector(p, inst),
565 get_result_flags(inst), 0,
566 swizzle(src0, X, X, X, X), 0, 0);
567 break;
568
569 case OPCODE_SCS:
570 src0 = src_vector(p, &inst->SrcReg[0], program);
571 tmp = i915_get_utemp(p);
572
573 /*
574 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
575 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
576 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
577 * scs.x = DP4 t1, sin_constants
578 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
579 * scs.y = DP4 t1, cos_constants
580 */
581 i915_emit_arith(p,
582 A0_MUL,
583 tmp, A0_DEST_CHANNEL_XY, 0,
584 swizzle(src0, X, X, ONE, ONE),
585 swizzle(src0, X, ONE, ONE, ONE), 0);
586
587 i915_emit_arith(p,
588 A0_MUL,
589 tmp, A0_DEST_CHANNEL_ALL, 0,
590 swizzle(tmp, X, Y, X, Y),
591 swizzle(tmp, X, X, ONE, ONE), 0);
592
593 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
594 GLuint tmp1;
595
596 if (inst->DstReg.WriteMask & WRITEMASK_X)
597 tmp1 = i915_get_utemp(p);
598 else
599 tmp1 = tmp;
600
601 i915_emit_arith(p,
602 A0_MUL,
603 tmp1, A0_DEST_CHANNEL_ALL, 0,
604 swizzle(tmp, X, Y, Y, W),
605 swizzle(tmp, X, Z, ONE, ONE), 0);
606
607 i915_emit_arith(p,
608 A0_DP4,
609 get_result_vector(p, inst),
610 A0_DEST_CHANNEL_Y, 0,
611 swizzle(tmp1, W, Z, Y, X),
612 i915_emit_const4fv(p, sin_constants), 0);
613 }
614
615 if (inst->DstReg.WriteMask & WRITEMASK_X) {
616 i915_emit_arith(p,
617 A0_MUL,
618 tmp, A0_DEST_CHANNEL_XYZ, 0,
619 swizzle(tmp, X, X, Z, ONE),
620 swizzle(tmp, Z, ONE, ONE, ONE), 0);
621
622 i915_emit_arith(p,
623 A0_DP4,
624 get_result_vector(p, inst),
625 A0_DEST_CHANNEL_X, 0,
626 swizzle(tmp, ONE, Z, Y, X),
627 i915_emit_const4fv(p, cos_constants), 0);
628 }
629 break;
630
631 case OPCODE_SGE:
632 EMIT_2ARG_ARITH(A0_SGE);
633 break;
634
635 case OPCODE_SIN:
636 src0 = src_vector(p, &inst->SrcReg[0], program);
637 tmp = i915_get_utemp(p);
638
639 i915_emit_arith(p,
640 A0_MUL,
641 tmp, A0_DEST_CHANNEL_X, 0,
642 src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
643
644 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
645
646 /* By choosing different taylor constants, could get rid of this mul:
647 */
648 i915_emit_arith(p,
649 A0_MUL,
650 tmp, A0_DEST_CHANNEL_X, 0,
651 tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
652
653 /*
654 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
655 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
656 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
657 * result = DP4 t1.wzyx, sin_constants
658 */
659 i915_emit_arith(p,
660 A0_MUL,
661 tmp, A0_DEST_CHANNEL_XY, 0,
662 swizzle(tmp, X, X, ONE, ONE),
663 swizzle(tmp, X, ONE, ONE, ONE), 0);
664
665 i915_emit_arith(p,
666 A0_MUL,
667 tmp, A0_DEST_CHANNEL_ALL, 0,
668 swizzle(tmp, X, Y, X, Y),
669 swizzle(tmp, X, X, ONE, ONE), 0);
670
671 i915_emit_arith(p,
672 A0_MUL,
673 tmp, A0_DEST_CHANNEL_ALL, 0,
674 swizzle(tmp, X, Y, Y, W),
675 swizzle(tmp, X, Z, ONE, ONE), 0);
676
677 i915_emit_arith(p,
678 A0_DP4,
679 get_result_vector(p, inst),
680 get_result_flags(inst), 0,
681 swizzle(tmp, W, Z, Y, X),
682 i915_emit_const4fv(p, sin_constants), 0);
683 break;
684
685 case OPCODE_SLT:
686 EMIT_2ARG_ARITH(A0_SLT);
687 break;
688
689 case OPCODE_SUB:
690 src0 = src_vector(p, &inst->SrcReg[0], program);
691 src1 = src_vector(p, &inst->SrcReg[1], program);
692
693 i915_emit_arith(p,
694 A0_ADD,
695 get_result_vector(p, inst),
696 get_result_flags(inst), 0,
697 src0, negate(src1, 1, 1, 1, 1), 0);
698 break;
699
700 case OPCODE_SWZ:
701 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */
702 break;
703
704 case OPCODE_TEX:
705 EMIT_TEX(T0_TEXLD);
706 break;
707
708 case OPCODE_TXB:
709 EMIT_TEX(T0_TEXLDB);
710 break;
711
712 case OPCODE_TXP:
713 EMIT_TEX(T0_TEXLDP);
714 break;
715
716 case OPCODE_XPD:
717 /* Cross product:
718 * result.x = src0.y * src1.z - src0.z * src1.y;
719 * result.y = src0.z * src1.x - src0.x * src1.z;
720 * result.z = src0.x * src1.y - src0.y * src1.x;
721 * result.w = undef;
722 */
723 src0 = src_vector(p, &inst->SrcReg[0], program);
724 src1 = src_vector(p, &inst->SrcReg[1], program);
725 tmp = i915_get_utemp(p);
726
727 i915_emit_arith(p,
728 A0_MUL,
729 tmp, A0_DEST_CHANNEL_ALL, 0,
730 swizzle(src0, Z, X, Y, ONE),
731 swizzle(src1, Y, Z, X, ONE), 0);
732
733 i915_emit_arith(p,
734 A0_MAD,
735 get_result_vector(p, inst),
736 get_result_flags(inst), 0,
737 swizzle(src0, Y, Z, X, ONE),
738 swizzle(src1, Z, X, Y, ONE),
739 negate(tmp, 1, 1, 1, 0));
740 break;
741
742 case OPCODE_END:
743 return;
744
745 default:
746 i915_program_error(p, "bad opcode");
747 return;
748 }
749
750 inst++;
751 i915_release_utemps(p);
752 }
753 }
754
755 /* Rather than trying to intercept and jiggle depth writes during
756 * emit, just move the value into its correct position at the end of
757 * the program:
758 */
759 static void
760 fixup_depth_write(struct i915_fragment_program *p)
761 {
762 if (p->depth_written) {
763 GLuint depth = UREG(REG_TYPE_OD, 0);
764
765 i915_emit_arith(p,
766 A0_MOV,
767 depth, A0_DEST_CHANNEL_W, 0,
768 swizzle(depth, X, Y, Z, Z), 0, 0);
769 }
770 }
771
772
773 #define FRAG_BIT_TEX(n) (FRAG_BIT_TEX0 << (n))
774
775
776 static void
777 check_wpos(struct i915_fragment_program *p)
778 {
779 GLuint inputs = p->FragProg.Base.InputsRead;
780 GLint i;
781
782 p->wpos_tex = -1;
783
784 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
785 if (inputs & FRAG_BIT_TEX(i))
786 continue;
787 else if (inputs & FRAG_BIT_WPOS) {
788 p->wpos_tex = i;
789 inputs &= ~FRAG_BIT_WPOS;
790 }
791 }
792
793 if (inputs & FRAG_BIT_WPOS) {
794 i915_program_error(p, "No free texcoord for wpos value");
795 }
796 }
797
798
799 static void
800 translate_program(struct i915_fragment_program *p)
801 {
802 struct i915_context *i915 = I915_CONTEXT(p->ctx);
803
804 i915_init_program(i915, p);
805 check_wpos(p);
806 upload_program(p);
807 fixup_depth_write(p);
808 i915_fini_program(p);
809
810 p->translated = 1;
811 }
812
813
814 static void
815 track_params(struct i915_fragment_program *p)
816 {
817 GLint i;
818
819 if (p->nr_params)
820 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
821
822 for (i = 0; i < p->nr_params; i++) {
823 GLint reg = p->param[i].reg;
824 COPY_4V(p->constant[reg], p->param[i].values);
825 }
826
827 p->params_uptodate = 1;
828 p->on_hardware = 0; /* overkill */
829 }
830
831
832 static void
833 i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
834 {
835 if (target == GL_FRAGMENT_PROGRAM_ARB) {
836 struct i915_context *i915 = I915_CONTEXT(ctx);
837 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
838
839 if (i915->current_program == p)
840 return;
841
842 if (i915->current_program) {
843 i915->current_program->on_hardware = 0;
844 i915->current_program->params_uptodate = 0;
845 }
846
847 i915->current_program = p;
848
849 assert(p->on_hardware == 0);
850 assert(p->params_uptodate == 0);
851
852 /* Hack: make sure fog is correctly enabled according to this
853 * fragment program's fog options.
854 */
855 ctx->Driver.Enable(ctx, GL_FRAGMENT_PROGRAM_ARB,
856 ctx->FragmentProgram.Enabled);
857 }
858 }
859
860 static struct gl_program *
861 i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
862 {
863 switch (target) {
864 case GL_VERTEX_PROGRAM_ARB:
865 return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
866 target, id);
867
868 case GL_FRAGMENT_PROGRAM_ARB:{
869 struct i915_fragment_program *prog =
870 CALLOC_STRUCT(i915_fragment_program);
871 if (prog) {
872 i915_init_program(I915_CONTEXT(ctx), prog);
873
874 return _mesa_init_fragment_program(ctx, &prog->FragProg,
875 target, id);
876 }
877 else
878 return NULL;
879 }
880
881 default:
882 /* Just fallback:
883 */
884 return _mesa_new_program(ctx, target, id);
885 }
886 }
887
888 static void
889 i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
890 {
891 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
892 struct i915_context *i915 = I915_CONTEXT(ctx);
893 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
894
895 if (i915->current_program == p)
896 i915->current_program = 0;
897 }
898
899 _mesa_delete_program(ctx, prog);
900 }
901
902
903 static GLboolean
904 i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
905 {
906 if (target == GL_FRAGMENT_PROGRAM_ARB) {
907 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
908
909 if (!p->translated)
910 translate_program(p);
911
912 return !p->error;
913 }
914 else
915 return GL_TRUE;
916 }
917
918 static void
919 i915ProgramStringNotify(GLcontext * ctx,
920 GLenum target, struct gl_program *prog)
921 {
922 if (target == GL_FRAGMENT_PROGRAM_ARB) {
923 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
924 p->translated = 0;
925
926 /* Hack: make sure fog is correctly enabled according to this
927 * fragment program's fog options.
928 */
929 ctx->Driver.Enable(ctx, GL_FRAGMENT_PROGRAM_ARB,
930 ctx->FragmentProgram.Enabled);
931 }
932
933 _tnl_program_string(ctx, target, prog);
934 }
935
936
937 void
938 i915ValidateFragmentProgram(struct i915_context *i915)
939 {
940 GLcontext *ctx = &i915->intel.ctx;
941 struct intel_context *intel = intel_context(ctx);
942 TNLcontext *tnl = TNL_CONTEXT(ctx);
943 struct vertex_buffer *VB = &tnl->vb;
944
945 struct i915_fragment_program *p =
946 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
947
948 const GLuint inputsRead = p->FragProg.Base.InputsRead;
949 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
950 GLuint s2 = S2_TEXCOORD_NONE;
951 int i, offset = 0;
952
953 if (i915->current_program != p) {
954 if (i915->current_program) {
955 i915->current_program->on_hardware = 0;
956 i915->current_program->params_uptodate = 0;
957 }
958
959 i915->current_program = p;
960 }
961
962
963 /* Important:
964 */
965 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
966
967 if (!p->translated)
968 translate_program(p);
969
970 intel->vertex_attr_count = 0;
971 intel->wpos_offset = 0;
972 intel->wpos_size = 0;
973 intel->coloroffset = 0;
974 intel->specoffset = 0;
975
976 if (inputsRead & FRAG_BITS_TEX_ANY) {
977 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
978 }
979 else {
980 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
981 }
982
983 if (inputsRead & FRAG_BIT_COL0) {
984 intel->coloroffset = offset / 4;
985 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
986 }
987
988 if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) ||
989 i915->vertex_fog != I915_FOG_NONE) {
990
991 if (inputsRead & FRAG_BIT_COL1) {
992 intel->specoffset = offset / 4;
993 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3);
994 }
995 else
996 EMIT_PAD(3);
997
998 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
999 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1);
1000 else
1001 EMIT_PAD(1);
1002 }
1003
1004 #if 0
1005 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1006 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
1007 }
1008 #endif
1009
1010 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1011 if (inputsRead & FRAG_BIT_TEX(i)) {
1012 int sz = VB->TexCoordPtr[i]->size;
1013
1014 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1015 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1016
1017 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
1018 }
1019 else if (i == p->wpos_tex) {
1020
1021 /* If WPOS is required, duplicate the XYZ position data in an
1022 * unused texture coordinate:
1023 */
1024 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1025 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1026
1027 intel->wpos_offset = offset;
1028 intel->wpos_size = 3 * sizeof(GLuint);
1029
1030 EMIT_PAD(intel->wpos_size);
1031 }
1032 }
1033
1034 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1035 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1036 int k;
1037
1038 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1039
1040 /* Must do this *after* statechange, so as not to affect
1041 * buffered vertices reliant on the old state:
1042 */
1043 intel->vertex_size = _tnl_install_attrs(&intel->ctx,
1044 intel->vertex_attrs,
1045 intel->vertex_attr_count,
1046 intel->ViewportMatrix.m, 0);
1047
1048 intel->vertex_size >>= 2;
1049
1050 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1051 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1052
1053 k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
1054 assert(k);
1055 }
1056
1057 if (!p->params_uptodate)
1058 track_params(p);
1059
1060 if (!p->on_hardware)
1061 i915_upload_program(i915, p);
1062 }
1063
1064 void
1065 i915InitFragProgFuncs(struct dd_function_table *functions)
1066 {
1067 functions->BindProgram = i915BindProgram;
1068 functions->NewProgram = i915NewProgram;
1069 functions->DeleteProgram = i915DeleteProgram;
1070 functions->IsProgramNative = i915IsProgramNative;
1071 functions->ProgramStringNotify = i915ProgramStringNotify;
1072 }