don't use driDrawable information directly, don't resize at makecurrent
[mesa.git] / src / mesa / drivers / dri / i915tex / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "glheader.h"
29 #include "macros.h"
30 #include "enums.h"
31
32 #include "tnl/tnl.h"
33 #include "tnl/t_context.h"
34 #include "intel_batchbuffer.h"
35
36 #include "i915_reg.h"
37 #include "i915_context.h"
38 #include "i915_program.h"
39
40 #include "prog_instruction.h"
41 #include "prog_parameter.h"
42 #include "program.h"
43 #include "programopt.h"
44
45
46
47 /* 1, -1/3!, 1/5!, -1/7! */
48 static const GLfloat sin_constants[4] = { 1.0,
49 -1.0 / (3 * 2 * 1),
50 1.0 / (5 * 4 * 3 * 2 * 1),
51 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
52 };
53
54 /* 1, -1/2!, 1/4!, -1/6! */
55 static const GLfloat cos_constants[4] = { 1.0,
56 -1.0 / (2 * 1),
57 1.0 / (4 * 3 * 2 * 1),
58 -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
59 };
60
61 /**
62 * Retrieve a ureg for the given source register. Will emit
63 * constants, apply swizzling and negation as needed.
64 */
65 static GLuint
66 src_vector(struct i915_fragment_program *p,
67 const struct prog_src_register *source,
68 const struct gl_fragment_program *program)
69 {
70 GLuint src;
71
72 switch (source->File) {
73
74 /* Registers:
75 */
76 case PROGRAM_TEMPORARY:
77 if (source->Index >= I915_MAX_TEMPORARY) {
78 i915_program_error(p, "Exceeded max temporary reg");
79 return 0;
80 }
81 src = UREG(REG_TYPE_R, source->Index);
82 break;
83 case PROGRAM_INPUT:
84 switch (source->Index) {
85 case FRAG_ATTRIB_WPOS:
86 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
87 break;
88 case FRAG_ATTRIB_COL0:
89 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
90 break;
91 case FRAG_ATTRIB_COL1:
92 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
93 src = swizzle(src, X, Y, Z, ONE);
94 break;
95 case FRAG_ATTRIB_FOGC:
96 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
97 src = swizzle(src, W, W, W, W);
98 break;
99 case FRAG_ATTRIB_TEX0:
100 case FRAG_ATTRIB_TEX1:
101 case FRAG_ATTRIB_TEX2:
102 case FRAG_ATTRIB_TEX3:
103 case FRAG_ATTRIB_TEX4:
104 case FRAG_ATTRIB_TEX5:
105 case FRAG_ATTRIB_TEX6:
106 case FRAG_ATTRIB_TEX7:
107 src = i915_emit_decl(p, REG_TYPE_T,
108 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
109 D0_CHANNEL_ALL);
110 break;
111
112 default:
113 i915_program_error(p, "Bad source->Index");
114 return 0;
115 }
116 break;
117
118 /* Various paramters and env values. All emitted to
119 * hardware as program constants.
120 */
121 case PROGRAM_LOCAL_PARAM:
122 src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
123 break;
124
125 case PROGRAM_ENV_PARAM:
126 src =
127 i915_emit_param4fv(p,
128 p->ctx->FragmentProgram.Parameters[source->
129 Index]);
130 break;
131
132 case PROGRAM_CONSTANT:
133 case PROGRAM_STATE_VAR:
134 case PROGRAM_NAMED_PARAM:
135 src =
136 i915_emit_param4fv(p,
137 program->Base.Parameters->ParameterValues[source->
138 Index]);
139 break;
140
141 default:
142 i915_program_error(p, "Bad source->File");
143 return 0;
144 }
145
146 src = swizzle(src,
147 GET_SWZ(source->Swizzle, 0),
148 GET_SWZ(source->Swizzle, 1),
149 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
150
151 if (source->NegateBase)
152 src = negate(src,
153 GET_BIT(source->NegateBase, 0),
154 GET_BIT(source->NegateBase, 1),
155 GET_BIT(source->NegateBase, 2),
156 GET_BIT(source->NegateBase, 3));
157
158 return src;
159 }
160
161
162 static GLuint
163 get_result_vector(struct i915_fragment_program *p,
164 const struct prog_instruction *inst)
165 {
166 switch (inst->DstReg.File) {
167 case PROGRAM_OUTPUT:
168 switch (inst->DstReg.Index) {
169 case FRAG_RESULT_COLR:
170 return UREG(REG_TYPE_OC, 0);
171 case FRAG_RESULT_DEPR:
172 p->depth_written = 1;
173 return UREG(REG_TYPE_OD, 0);
174 default:
175 i915_program_error(p, "Bad inst->DstReg.Index");
176 return 0;
177 }
178 case PROGRAM_TEMPORARY:
179 return UREG(REG_TYPE_R, inst->DstReg.Index);
180 default:
181 i915_program_error(p, "Bad inst->DstReg.File");
182 return 0;
183 }
184 }
185
186 static GLuint
187 get_result_flags(const struct prog_instruction *inst)
188 {
189 GLuint flags = 0;
190
191 if (inst->SaturateMode == SATURATE_ZERO_ONE)
192 flags |= A0_DEST_SATURATE;
193 if (inst->DstReg.WriteMask & WRITEMASK_X)
194 flags |= A0_DEST_CHANNEL_X;
195 if (inst->DstReg.WriteMask & WRITEMASK_Y)
196 flags |= A0_DEST_CHANNEL_Y;
197 if (inst->DstReg.WriteMask & WRITEMASK_Z)
198 flags |= A0_DEST_CHANNEL_Z;
199 if (inst->DstReg.WriteMask & WRITEMASK_W)
200 flags |= A0_DEST_CHANNEL_W;
201
202 return flags;
203 }
204
205 static GLuint
206 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
207 {
208 switch (bit) {
209 case TEXTURE_1D_INDEX:
210 return D0_SAMPLE_TYPE_2D;
211 case TEXTURE_2D_INDEX:
212 return D0_SAMPLE_TYPE_2D;
213 case TEXTURE_RECT_INDEX:
214 return D0_SAMPLE_TYPE_2D;
215 case TEXTURE_3D_INDEX:
216 return D0_SAMPLE_TYPE_VOLUME;
217 case TEXTURE_CUBE_INDEX:
218 return D0_SAMPLE_TYPE_CUBE;
219 default:
220 i915_program_error(p, "TexSrcBit");
221 return 0;
222 }
223 }
224
225 #define EMIT_TEX( OP ) \
226 do { \
227 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
228 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
229 inst->TexSrcUnit, dim); \
230 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
231 /* Texel lookup */ \
232 \
233 i915_emit_texld( p, \
234 get_result_vector( p, inst ), \
235 get_result_flags( inst ), \
236 sampler, \
237 coord, \
238 OP); \
239 } while (0)
240
241 #define EMIT_ARITH( OP, N ) \
242 do { \
243 i915_emit_arith( p, \
244 OP, \
245 get_result_vector( p, inst ), \
246 get_result_flags( inst ), 0, \
247 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
248 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
249 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
250 } while (0)
251
252 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
253 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
254 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
255
256
257 /* Possible concerns:
258 *
259 * SIN, COS -- could use another taylor step?
260 * LIT -- results seem a little different to sw mesa
261 * LOG -- different to mesa on negative numbers, but this is conformant.
262 *
263 * Parse failures -- Mesa doesn't currently give a good indication
264 * internally whether a particular program string parsed or not. This
265 * can lead to confusion -- hopefully we cope with it ok now.
266 *
267 */
268 static void
269 upload_program(struct i915_fragment_program *p)
270 {
271 const struct gl_fragment_program *program =
272 p->ctx->FragmentProgram._Current;
273 const struct prog_instruction *inst = program->Base.Instructions;
274
275 /* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
276
277 /* Is this a parse-failed program? Ensure a valid program is
278 * loaded, as the flagging of an error isn't sufficient to stop
279 * this being uploaded to hardware.
280 */
281 if (inst[0].Opcode == OPCODE_END) {
282 GLuint tmp = i915_get_utemp(p);
283 i915_emit_arith(p,
284 A0_MOV,
285 UREG(REG_TYPE_OC, 0),
286 A0_DEST_CHANNEL_ALL, 0,
287 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
288 return;
289 }
290
291 while (1) {
292 GLuint src0, src1, src2, flags;
293 GLuint tmp = 0;
294
295 switch (inst->Opcode) {
296 case OPCODE_ABS:
297 src0 = src_vector(p, &inst->SrcReg[0], program);
298 i915_emit_arith(p,
299 A0_MAX,
300 get_result_vector(p, inst),
301 get_result_flags(inst), 0,
302 src0, negate(src0, 1, 1, 1, 1), 0);
303 break;
304
305 case OPCODE_ADD:
306 EMIT_2ARG_ARITH(A0_ADD);
307 break;
308
309 case OPCODE_CMP:
310 src0 = src_vector(p, &inst->SrcReg[0], program);
311 src1 = src_vector(p, &inst->SrcReg[1], program);
312 src2 = src_vector(p, &inst->SrcReg[2], program);
313 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */
314 break;
315
316 case OPCODE_COS:
317 src0 = src_vector(p, &inst->SrcReg[0], program);
318 tmp = i915_get_utemp(p);
319
320 i915_emit_arith(p,
321 A0_MUL,
322 tmp, A0_DEST_CHANNEL_X, 0,
323 src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
324
325 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
326
327 /* By choosing different taylor constants, could get rid of this mul:
328 */
329 i915_emit_arith(p,
330 A0_MUL,
331 tmp, A0_DEST_CHANNEL_X, 0,
332 tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
333
334 /*
335 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
336 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
337 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
338 * result = DP4 t0, cos_constants
339 */
340 i915_emit_arith(p,
341 A0_MUL,
342 tmp, A0_DEST_CHANNEL_XY, 0,
343 swizzle(tmp, X, X, ONE, ONE),
344 swizzle(tmp, X, ONE, ONE, ONE), 0);
345
346 i915_emit_arith(p,
347 A0_MUL,
348 tmp, A0_DEST_CHANNEL_XYZ, 0,
349 swizzle(tmp, X, Y, X, ONE),
350 swizzle(tmp, X, X, ONE, ONE), 0);
351
352 i915_emit_arith(p,
353 A0_MUL,
354 tmp, A0_DEST_CHANNEL_XYZ, 0,
355 swizzle(tmp, X, X, Z, ONE),
356 swizzle(tmp, Z, ONE, ONE, ONE), 0);
357
358 i915_emit_arith(p,
359 A0_DP4,
360 get_result_vector(p, inst),
361 get_result_flags(inst), 0,
362 swizzle(tmp, ONE, Z, Y, X),
363 i915_emit_const4fv(p, cos_constants), 0);
364
365 break;
366
367 case OPCODE_DP3:
368 EMIT_2ARG_ARITH(A0_DP3);
369 break;
370
371 case OPCODE_DP4:
372 EMIT_2ARG_ARITH(A0_DP4);
373 break;
374
375 case OPCODE_DPH:
376 src0 = src_vector(p, &inst->SrcReg[0], program);
377 src1 = src_vector(p, &inst->SrcReg[1], program);
378
379 i915_emit_arith(p,
380 A0_DP4,
381 get_result_vector(p, inst),
382 get_result_flags(inst), 0,
383 swizzle(src0, X, Y, Z, ONE), src1, 0);
384 break;
385
386 case OPCODE_DST:
387 src0 = src_vector(p, &inst->SrcReg[0], program);
388 src1 = src_vector(p, &inst->SrcReg[1], program);
389
390 /* result[0] = 1 * 1;
391 * result[1] = a[1] * b[1];
392 * result[2] = a[2] * 1;
393 * result[3] = 1 * b[3];
394 */
395 i915_emit_arith(p,
396 A0_MUL,
397 get_result_vector(p, inst),
398 get_result_flags(inst), 0,
399 swizzle(src0, ONE, Y, Z, ONE),
400 swizzle(src1, ONE, Y, ONE, W), 0);
401 break;
402
403 case OPCODE_EX2:
404 src0 = src_vector(p, &inst->SrcReg[0], program);
405
406 i915_emit_arith(p,
407 A0_EXP,
408 get_result_vector(p, inst),
409 get_result_flags(inst), 0,
410 swizzle(src0, X, X, X, X), 0, 0);
411 break;
412
413 case OPCODE_FLR:
414 EMIT_1ARG_ARITH(A0_FLR);
415 break;
416
417 case OPCODE_FRC:
418 EMIT_1ARG_ARITH(A0_FRC);
419 break;
420
421 case OPCODE_KIL:
422 src0 = src_vector(p, &inst->SrcReg[0], program);
423 tmp = i915_get_utemp(p);
424
425 i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
426 0, src0, T0_TEXKILL);
427 break;
428
429 case OPCODE_LG2:
430 src0 = src_vector(p, &inst->SrcReg[0], program);
431
432 i915_emit_arith(p,
433 A0_LOG,
434 get_result_vector(p, inst),
435 get_result_flags(inst), 0,
436 swizzle(src0, X, X, X, X), 0, 0);
437 break;
438
439 case OPCODE_LIT:
440 src0 = src_vector(p, &inst->SrcReg[0], program);
441 tmp = i915_get_utemp(p);
442
443 /* tmp = max( a.xyzw, a.00zw )
444 * XXX: Clamp tmp.w to -128..128
445 * tmp.y = log(tmp.y)
446 * tmp.y = tmp.w * tmp.y
447 * tmp.y = exp(tmp.y)
448 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
449 */
450 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
451 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
452
453 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
454 swizzle(tmp, Y, Y, Y, Y), 0, 0);
455
456 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
457 swizzle(tmp, ZERO, Y, ZERO, ZERO),
458 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
459
460 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
461 swizzle(tmp, Y, Y, Y, Y), 0, 0);
462
463 i915_emit_arith(p, A0_CMP,
464 get_result_vector(p, inst),
465 get_result_flags(inst), 0,
466 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
467 swizzle(tmp, ONE, X, ZERO, ONE),
468 swizzle(tmp, ONE, X, Y, ONE));
469
470 break;
471
472 case OPCODE_LRP:
473 src0 = src_vector(p, &inst->SrcReg[0], program);
474 src1 = src_vector(p, &inst->SrcReg[1], program);
475 src2 = src_vector(p, &inst->SrcReg[2], program);
476 flags = get_result_flags(inst);
477 tmp = i915_get_utemp(p);
478
479 /* b*a + c*(1-a)
480 *
481 * b*a + c - ca
482 *
483 * tmp = b*a + c,
484 * result = (-c)*a + tmp
485 */
486 i915_emit_arith(p, A0_MAD, tmp,
487 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
488
489 i915_emit_arith(p, A0_MAD,
490 get_result_vector(p, inst),
491 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
492 break;
493
494 case OPCODE_MAD:
495 EMIT_3ARG_ARITH(A0_MAD);
496 break;
497
498 case OPCODE_MAX:
499 EMIT_2ARG_ARITH(A0_MAX);
500 break;
501
502 case OPCODE_MIN:
503 src0 = src_vector(p, &inst->SrcReg[0], program);
504 src1 = src_vector(p, &inst->SrcReg[1], program);
505 tmp = i915_get_utemp(p);
506 flags = get_result_flags(inst);
507
508 i915_emit_arith(p,
509 A0_MAX,
510 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
511 negate(src0, 1, 1, 1, 1),
512 negate(src1, 1, 1, 1, 1), 0);
513
514 i915_emit_arith(p,
515 A0_MOV,
516 get_result_vector(p, inst),
517 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
518 break;
519
520 case OPCODE_MOV:
521 EMIT_1ARG_ARITH(A0_MOV);
522 break;
523
524 case OPCODE_MUL:
525 EMIT_2ARG_ARITH(A0_MUL);
526 break;
527
528 case OPCODE_POW:
529 src0 = src_vector(p, &inst->SrcReg[0], program);
530 src1 = src_vector(p, &inst->SrcReg[1], program);
531 tmp = i915_get_utemp(p);
532 flags = get_result_flags(inst);
533
534 /* XXX: masking on intermediate values, here and elsewhere.
535 */
536 i915_emit_arith(p,
537 A0_LOG,
538 tmp, A0_DEST_CHANNEL_X, 0,
539 swizzle(src0, X, X, X, X), 0, 0);
540
541 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
542
543
544 i915_emit_arith(p,
545 A0_EXP,
546 get_result_vector(p, inst),
547 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
548
549 break;
550
551 case OPCODE_RCP:
552 src0 = src_vector(p, &inst->SrcReg[0], program);
553
554 i915_emit_arith(p,
555 A0_RCP,
556 get_result_vector(p, inst),
557 get_result_flags(inst), 0,
558 swizzle(src0, X, X, X, X), 0, 0);
559 break;
560
561 case OPCODE_RSQ:
562
563 src0 = src_vector(p, &inst->SrcReg[0], program);
564
565 i915_emit_arith(p,
566 A0_RSQ,
567 get_result_vector(p, inst),
568 get_result_flags(inst), 0,
569 swizzle(src0, X, X, X, X), 0, 0);
570 break;
571
572 case OPCODE_SCS:
573 src0 = src_vector(p, &inst->SrcReg[0], program);
574 tmp = i915_get_utemp(p);
575
576 /*
577 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
578 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
579 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
580 * scs.x = DP4 t1, sin_constants
581 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
582 * scs.y = DP4 t1, cos_constants
583 */
584 i915_emit_arith(p,
585 A0_MUL,
586 tmp, A0_DEST_CHANNEL_XY, 0,
587 swizzle(src0, X, X, ONE, ONE),
588 swizzle(src0, X, ONE, ONE, ONE), 0);
589
590 i915_emit_arith(p,
591 A0_MUL,
592 tmp, A0_DEST_CHANNEL_ALL, 0,
593 swizzle(tmp, X, Y, X, Y),
594 swizzle(tmp, X, X, ONE, ONE), 0);
595
596 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
597 GLuint tmp1;
598
599 if (inst->DstReg.WriteMask & WRITEMASK_X)
600 tmp1 = i915_get_utemp(p);
601 else
602 tmp1 = tmp;
603
604 i915_emit_arith(p,
605 A0_MUL,
606 tmp1, A0_DEST_CHANNEL_ALL, 0,
607 swizzle(tmp, X, Y, Y, W),
608 swizzle(tmp, X, Z, ONE, ONE), 0);
609
610 i915_emit_arith(p,
611 A0_DP4,
612 get_result_vector(p, inst),
613 A0_DEST_CHANNEL_Y, 0,
614 swizzle(tmp1, W, Z, Y, X),
615 i915_emit_const4fv(p, sin_constants), 0);
616 }
617
618 if (inst->DstReg.WriteMask & WRITEMASK_X) {
619 i915_emit_arith(p,
620 A0_MUL,
621 tmp, A0_DEST_CHANNEL_XYZ, 0,
622 swizzle(tmp, X, X, Z, ONE),
623 swizzle(tmp, Z, ONE, ONE, ONE), 0);
624
625 i915_emit_arith(p,
626 A0_DP4,
627 get_result_vector(p, inst),
628 A0_DEST_CHANNEL_X, 0,
629 swizzle(tmp, ONE, Z, Y, X),
630 i915_emit_const4fv(p, cos_constants), 0);
631 }
632 break;
633
634 case OPCODE_SGE:
635 EMIT_2ARG_ARITH(A0_SGE);
636 break;
637
638 case OPCODE_SIN:
639 src0 = src_vector(p, &inst->SrcReg[0], program);
640 tmp = i915_get_utemp(p);
641
642 i915_emit_arith(p,
643 A0_MUL,
644 tmp, A0_DEST_CHANNEL_X, 0,
645 src0, i915_emit_const1f(p, 1.0 / (M_PI * 2)), 0);
646
647 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
648
649 /* By choosing different taylor constants, could get rid of this mul:
650 */
651 i915_emit_arith(p,
652 A0_MUL,
653 tmp, A0_DEST_CHANNEL_X, 0,
654 tmp, i915_emit_const1f(p, (M_PI * 2)), 0);
655
656 /*
657 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
658 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
659 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
660 * result = DP4 t1.wzyx, sin_constants
661 */
662 i915_emit_arith(p,
663 A0_MUL,
664 tmp, A0_DEST_CHANNEL_XY, 0,
665 swizzle(tmp, X, X, ONE, ONE),
666 swizzle(tmp, X, ONE, ONE, ONE), 0);
667
668 i915_emit_arith(p,
669 A0_MUL,
670 tmp, A0_DEST_CHANNEL_ALL, 0,
671 swizzle(tmp, X, Y, X, Y),
672 swizzle(tmp, X, X, ONE, ONE), 0);
673
674 i915_emit_arith(p,
675 A0_MUL,
676 tmp, A0_DEST_CHANNEL_ALL, 0,
677 swizzle(tmp, X, Y, Y, W),
678 swizzle(tmp, X, Z, ONE, ONE), 0);
679
680 i915_emit_arith(p,
681 A0_DP4,
682 get_result_vector(p, inst),
683 get_result_flags(inst), 0,
684 swizzle(tmp, W, Z, Y, X),
685 i915_emit_const4fv(p, sin_constants), 0);
686 break;
687
688 case OPCODE_SLT:
689 EMIT_2ARG_ARITH(A0_SLT);
690 break;
691
692 case OPCODE_SUB:
693 src0 = src_vector(p, &inst->SrcReg[0], program);
694 src1 = src_vector(p, &inst->SrcReg[1], program);
695
696 i915_emit_arith(p,
697 A0_ADD,
698 get_result_vector(p, inst),
699 get_result_flags(inst), 0,
700 src0, negate(src1, 1, 1, 1, 1), 0);
701 break;
702
703 case OPCODE_SWZ:
704 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */
705 break;
706
707 case OPCODE_TEX:
708 EMIT_TEX(T0_TEXLD);
709 break;
710
711 case OPCODE_TXB:
712 EMIT_TEX(T0_TEXLDB);
713 break;
714
715 case OPCODE_TXP:
716 EMIT_TEX(T0_TEXLDP);
717 break;
718
719 case OPCODE_XPD:
720 /* Cross product:
721 * result.x = src0.y * src1.z - src0.z * src1.y;
722 * result.y = src0.z * src1.x - src0.x * src1.z;
723 * result.z = src0.x * src1.y - src0.y * src1.x;
724 * result.w = undef;
725 */
726 src0 = src_vector(p, &inst->SrcReg[0], program);
727 src1 = src_vector(p, &inst->SrcReg[1], program);
728 tmp = i915_get_utemp(p);
729
730 i915_emit_arith(p,
731 A0_MUL,
732 tmp, A0_DEST_CHANNEL_ALL, 0,
733 swizzle(src0, Z, X, Y, ONE),
734 swizzle(src1, Y, Z, X, ONE), 0);
735
736 i915_emit_arith(p,
737 A0_MAD,
738 get_result_vector(p, inst),
739 get_result_flags(inst), 0,
740 swizzle(src0, Y, Z, X, ONE),
741 swizzle(src1, Z, X, Y, ONE),
742 negate(tmp, 1, 1, 1, 0));
743 break;
744
745 case OPCODE_END:
746 return;
747
748 default:
749 i915_program_error(p, "bad opcode");
750 return;
751 }
752
753 inst++;
754 i915_release_utemps(p);
755 }
756 }
757
758 /* Rather than trying to intercept and jiggle depth writes during
759 * emit, just move the value into its correct position at the end of
760 * the program:
761 */
762 static void
763 fixup_depth_write(struct i915_fragment_program *p)
764 {
765 if (p->depth_written) {
766 GLuint depth = UREG(REG_TYPE_OD, 0);
767
768 i915_emit_arith(p,
769 A0_MOV,
770 depth, A0_DEST_CHANNEL_W, 0,
771 swizzle(depth, X, Y, Z, Z), 0, 0);
772 }
773 }
774
775
776 static void
777 check_wpos(struct i915_fragment_program *p)
778 {
779 GLuint inputs = p->FragProg.Base.InputsRead;
780 GLint i;
781
782 p->wpos_tex = -1;
783
784 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
785 if (inputs & FRAG_BIT_TEX(i))
786 continue;
787 else if (inputs & FRAG_BIT_WPOS) {
788 p->wpos_tex = i;
789 inputs &= ~FRAG_BIT_WPOS;
790 }
791 }
792
793 if (inputs & FRAG_BIT_WPOS) {
794 i915_program_error(p, "No free texcoord for wpos value");
795 }
796 }
797
798
799 static void
800 translate_program(struct i915_fragment_program *p)
801 {
802 struct i915_context *i915 = I915_CONTEXT(p->ctx);
803
804 i915_init_program(i915, p);
805 check_wpos(p);
806 upload_program(p);
807 fixup_depth_write(p);
808 i915_fini_program(p);
809
810 p->translated = 1;
811 }
812
813
814 static void
815 track_params(struct i915_fragment_program *p)
816 {
817 GLint i;
818
819 if (p->nr_params)
820 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
821
822 for (i = 0; i < p->nr_params; i++) {
823 GLint reg = p->param[i].reg;
824 COPY_4V(p->constant[reg], p->param[i].values);
825 }
826
827 p->params_uptodate = 1;
828 p->on_hardware = 0; /* overkill */
829 }
830
831
832 static void
833 i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
834 {
835 if (target == GL_FRAGMENT_PROGRAM_ARB) {
836 struct i915_context *i915 = I915_CONTEXT(ctx);
837 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
838
839 if (i915->current_program == p)
840 return;
841
842 if (i915->current_program) {
843 i915->current_program->on_hardware = 0;
844 i915->current_program->params_uptodate = 0;
845 }
846
847 i915->current_program = p;
848
849 assert(p->on_hardware == 0);
850 assert(p->params_uptodate == 0);
851
852 }
853 }
854
855 static struct gl_program *
856 i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
857 {
858 switch (target) {
859 case GL_VERTEX_PROGRAM_ARB:
860 return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
861 target, id);
862
863 case GL_FRAGMENT_PROGRAM_ARB:{
864 struct i915_fragment_program *prog =
865 CALLOC_STRUCT(i915_fragment_program);
866 if (prog) {
867 i915_init_program(I915_CONTEXT(ctx), prog);
868
869 return _mesa_init_fragment_program(ctx, &prog->FragProg,
870 target, id);
871 }
872 else
873 return NULL;
874 }
875
876 default:
877 /* Just fallback:
878 */
879 return _mesa_new_program(ctx, target, id);
880 }
881 }
882
883 static void
884 i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
885 {
886 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
887 struct i915_context *i915 = I915_CONTEXT(ctx);
888 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
889
890 if (i915->current_program == p)
891 i915->current_program = 0;
892 }
893
894 _mesa_delete_program(ctx, prog);
895 }
896
897
898 static GLboolean
899 i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
900 {
901 if (target == GL_FRAGMENT_PROGRAM_ARB) {
902 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
903
904 if (!p->translated)
905 translate_program(p);
906
907 return !p->error;
908 }
909 else
910 return GL_TRUE;
911 }
912
913 static void
914 i915ProgramStringNotify(GLcontext * ctx,
915 GLenum target, struct gl_program *prog)
916 {
917 if (target == GL_FRAGMENT_PROGRAM_ARB) {
918 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
919 p->translated = 0;
920
921 /* Hack: make sure fog is correctly enabled according to this
922 * fragment program's fog options.
923 */
924 if (p->FragProg.FogOption) {
925 /* add extra instructions to do fog, then turn off FogOption field */
926 _mesa_append_fog_code(ctx, &p->FragProg);
927 p->FragProg.FogOption = GL_NONE;
928 }
929 }
930
931 _tnl_program_string(ctx, target, prog);
932 }
933
934
935 void
936 i915ValidateFragmentProgram(struct i915_context *i915)
937 {
938 GLcontext *ctx = &i915->intel.ctx;
939 struct intel_context *intel = intel_context(ctx);
940 TNLcontext *tnl = TNL_CONTEXT(ctx);
941 struct vertex_buffer *VB = &tnl->vb;
942
943 struct i915_fragment_program *p =
944 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
945
946 const GLuint inputsRead = p->FragProg.Base.InputsRead;
947 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
948 GLuint s2 = S2_TEXCOORD_NONE;
949 int i, offset = 0;
950
951 if (i915->current_program != p) {
952 if (i915->current_program) {
953 i915->current_program->on_hardware = 0;
954 i915->current_program->params_uptodate = 0;
955 }
956
957 i915->current_program = p;
958 }
959
960
961 /* Important:
962 */
963 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
964
965 if (!p->translated)
966 translate_program(p);
967
968 intel->vertex_attr_count = 0;
969 intel->wpos_offset = 0;
970 intel->wpos_size = 0;
971 intel->coloroffset = 0;
972 intel->specoffset = 0;
973
974 if (inputsRead & FRAG_BITS_TEX_ANY) {
975 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
976 }
977 else {
978 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
979 }
980
981 if (inputsRead & FRAG_BIT_COL0) {
982 intel->coloroffset = offset / 4;
983 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
984 }
985
986 if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) ||
987 i915->vertex_fog != I915_FOG_NONE) {
988
989 if (inputsRead & FRAG_BIT_COL1) {
990 intel->specoffset = offset / 4;
991 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3);
992 }
993 else
994 EMIT_PAD(3);
995
996 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
997 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1);
998 else
999 EMIT_PAD(1);
1000 }
1001
1002 /* XXX this was disabled, but enabling this code helped fix the Glean
1003 * tfragprog1 fog tests.
1004 */
1005 #if 1
1006 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1007 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
1008 }
1009 #endif
1010
1011 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1012 if (inputsRead & FRAG_BIT_TEX(i)) {
1013 int sz = VB->TexCoordPtr[i]->size;
1014
1015 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1016 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1017
1018 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
1019 }
1020 else if (i == p->wpos_tex) {
1021
1022 /* If WPOS is required, duplicate the XYZ position data in an
1023 * unused texture coordinate:
1024 */
1025 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1026 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1027
1028 intel->wpos_offset = offset;
1029 intel->wpos_size = 3 * sizeof(GLuint);
1030
1031 EMIT_PAD(intel->wpos_size);
1032 }
1033 }
1034
1035 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1036 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1037 int k;
1038
1039 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1040
1041 /* Must do this *after* statechange, so as not to affect
1042 * buffered vertices reliant on the old state:
1043 */
1044 intel->vertex_size = _tnl_install_attrs(&intel->ctx,
1045 intel->vertex_attrs,
1046 intel->vertex_attr_count,
1047 intel->ViewportMatrix.m, 0);
1048
1049 intel->vertex_size >>= 2;
1050
1051 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1052 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1053
1054 k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
1055 assert(k);
1056 }
1057
1058 if (!p->params_uptodate)
1059 track_params(p);
1060
1061 if (!p->on_hardware)
1062 i915_upload_program(i915, p);
1063 }
1064
1065 void
1066 i915InitFragProgFuncs(struct dd_function_table *functions)
1067 {
1068 functions->BindProgram = i915BindProgram;
1069 functions->NewProgram = i915NewProgram;
1070 functions->DeleteProgram = i915DeleteProgram;
1071 functions->IsProgramNative = i915IsProgramNative;
1072 functions->ProgramStringNotify = i915ProgramStringNotify;
1073 }