4e1df738214983b109052766a8914acc9671a1de
[mesa.git] / src / mesa / drivers / dri / i915 / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "main/glheader.h"
29 #include "main/macros.h"
30 #include "main/enums.h"
31
32 #include "program/prog_instruction.h"
33 #include "program/prog_parameter.h"
34 #include "program/program.h"
35 #include "program/programopt.h"
36 #include "program/prog_print.h"
37
38 #include "tnl/tnl.h"
39 #include "tnl/t_context.h"
40
41 #include "intel_batchbuffer.h"
42
43 #include "i915_reg.h"
44 #include "i915_context.h"
45 #include "i915_program.h"
46
47 static const GLfloat sin_quad_constants[2][4] = {
48 {
49 2.0,
50 -1.0,
51 .5,
52 .75
53 },
54 {
55 4.0,
56 -4.0,
57 1.0 / (2.0 * M_PI),
58 .2225
59 }
60 };
61
62 static const GLfloat sin_constants[4] = { 1.0,
63 -1.0 / (3 * 2 * 1),
64 1.0 / (5 * 4 * 3 * 2 * 1),
65 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
66 };
67
68 /* 1, -1/2!, 1/4!, -1/6! */
69 static const GLfloat cos_constants[4] = { 1.0,
70 -1.0 / (2 * 1),
71 1.0 / (4 * 3 * 2 * 1),
72 -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
73 };
74
75 /* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
76 #define TEXCOORD_TEX (0<<7)
77 #define TEXCOORD_VAR (1<<7)
78
79 static unsigned
80 get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
81 {
82 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
83 if (p->texcoord_mapping[i] == texcoord)
84 return i;
85 }
86
87 /* blah */
88 return p->ctx->Const.MaxTextureCoordUnits - 1;
89 }
90
91 /**
92 * Retrieve a ureg for the given source register. Will emit
93 * constants, apply swizzling and negation as needed.
94 */
95 static GLuint
96 src_vector(struct i915_fragment_program *p,
97 const struct prog_src_register *source,
98 const struct gl_fragment_program *program)
99 {
100 GLuint src;
101 unsigned unit;
102
103 switch (source->File) {
104
105 /* Registers:
106 */
107 case PROGRAM_TEMPORARY:
108 if (source->Index >= I915_MAX_TEMPORARY) {
109 i915_program_error(p, "Exceeded max temporary reg: %d/%d",
110 source->Index, I915_MAX_TEMPORARY);
111 return 0;
112 }
113 src = UREG(REG_TYPE_R, source->Index);
114 break;
115 case PROGRAM_INPUT:
116 switch (source->Index) {
117 case VARYING_SLOT_POS:
118 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
119 break;
120 case VARYING_SLOT_COL0:
121 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
122 break;
123 case VARYING_SLOT_COL1:
124 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
125 src = swizzle(src, X, Y, Z, ONE);
126 break;
127 case VARYING_SLOT_FOGC:
128 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
129 src = swizzle(src, W, ZERO, ZERO, ONE);
130 break;
131 case VARYING_SLOT_TEX0:
132 case VARYING_SLOT_TEX1:
133 case VARYING_SLOT_TEX2:
134 case VARYING_SLOT_TEX3:
135 case VARYING_SLOT_TEX4:
136 case VARYING_SLOT_TEX5:
137 case VARYING_SLOT_TEX6:
138 case VARYING_SLOT_TEX7:
139 unit = get_texcoord_mapping(p, (source->Index -
140 VARYING_SLOT_TEX0) | TEXCOORD_TEX);
141 src = i915_emit_decl(p, REG_TYPE_T,
142 T_TEX0 + unit,
143 D0_CHANNEL_ALL);
144 break;
145
146 case VARYING_SLOT_VAR0:
147 case VARYING_SLOT_VAR0 + 1:
148 case VARYING_SLOT_VAR0 + 2:
149 case VARYING_SLOT_VAR0 + 3:
150 case VARYING_SLOT_VAR0 + 4:
151 case VARYING_SLOT_VAR0 + 5:
152 case VARYING_SLOT_VAR0 + 6:
153 case VARYING_SLOT_VAR0 + 7:
154 unit = get_texcoord_mapping(p, (source->Index -
155 VARYING_SLOT_VAR0) | TEXCOORD_VAR);
156 src = i915_emit_decl(p, REG_TYPE_T,
157 T_TEX0 + unit,
158 D0_CHANNEL_ALL);
159 break;
160
161 default:
162 i915_program_error(p, "Bad source->Index: %d", source->Index);
163 return 0;
164 }
165 break;
166
167 case PROGRAM_OUTPUT:
168 switch (source->Index) {
169 case FRAG_RESULT_COLOR:
170 case FRAG_RESULT_DATA0:
171 src = UREG(REG_TYPE_OC, 0);
172 break;
173 case FRAG_RESULT_DEPTH:
174 src = UREG(REG_TYPE_OD, 0);
175 break;
176 default:
177 i915_program_error(p, "Bad source->Index: %d", source->Index);
178 return 0;
179 }
180 break;
181
182 /* Various paramters and env values. All emitted to
183 * hardware as program constants.
184 */
185 case PROGRAM_CONSTANT:
186 case PROGRAM_STATE_VAR:
187 case PROGRAM_UNIFORM:
188 src = i915_emit_param4fv(p,
189 &program->Base.Parameters->ParameterValues[source->Index][0].f);
190 break;
191
192 default:
193 i915_program_error(p, "Bad source->File: %d", source->File);
194 return 0;
195 }
196
197 src = swizzle(src,
198 GET_SWZ(source->Swizzle, 0),
199 GET_SWZ(source->Swizzle, 1),
200 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
201
202 if (source->Negate)
203 src = negate(src,
204 GET_BIT(source->Negate, 0),
205 GET_BIT(source->Negate, 1),
206 GET_BIT(source->Negate, 2),
207 GET_BIT(source->Negate, 3));
208
209 return src;
210 }
211
212
213 static GLuint
214 get_result_vector(struct i915_fragment_program *p,
215 const struct prog_instruction *inst)
216 {
217 switch (inst->DstReg.File) {
218 case PROGRAM_OUTPUT:
219 switch (inst->DstReg.Index) {
220 case FRAG_RESULT_COLOR:
221 case FRAG_RESULT_DATA0:
222 return UREG(REG_TYPE_OC, 0);
223 case FRAG_RESULT_DEPTH:
224 p->depth_written = 1;
225 return UREG(REG_TYPE_OD, 0);
226 default:
227 i915_program_error(p, "Bad inst->DstReg.Index: %d",
228 inst->DstReg.Index);
229 return 0;
230 }
231 case PROGRAM_TEMPORARY:
232 return UREG(REG_TYPE_R, inst->DstReg.Index);
233 default:
234 i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
235 return 0;
236 }
237 }
238
239 static GLuint
240 get_result_flags(const struct prog_instruction *inst)
241 {
242 GLuint flags = 0;
243
244 if (inst->Saturate)
245 flags |= A0_DEST_SATURATE;
246 if (inst->DstReg.WriteMask & WRITEMASK_X)
247 flags |= A0_DEST_CHANNEL_X;
248 if (inst->DstReg.WriteMask & WRITEMASK_Y)
249 flags |= A0_DEST_CHANNEL_Y;
250 if (inst->DstReg.WriteMask & WRITEMASK_Z)
251 flags |= A0_DEST_CHANNEL_Z;
252 if (inst->DstReg.WriteMask & WRITEMASK_W)
253 flags |= A0_DEST_CHANNEL_W;
254
255 return flags;
256 }
257
258 static GLuint
259 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
260 {
261 switch (bit) {
262 case TEXTURE_1D_INDEX:
263 return D0_SAMPLE_TYPE_2D;
264 case TEXTURE_2D_INDEX:
265 return D0_SAMPLE_TYPE_2D;
266 case TEXTURE_RECT_INDEX:
267 return D0_SAMPLE_TYPE_2D;
268 case TEXTURE_3D_INDEX:
269 return D0_SAMPLE_TYPE_VOLUME;
270 case TEXTURE_CUBE_INDEX:
271 return D0_SAMPLE_TYPE_CUBE;
272 default:
273 i915_program_error(p, "TexSrcBit: %d", bit);
274 return 0;
275 }
276 }
277
278 #define EMIT_TEX( OP ) \
279 do { \
280 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
281 const struct gl_fragment_program *program = &p->FragProg; \
282 GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit]; \
283 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
284 unit, dim); \
285 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
286 /* Texel lookup */ \
287 \
288 i915_emit_texld( p, get_live_regs(p, inst), \
289 get_result_vector( p, inst ), \
290 get_result_flags( inst ), \
291 sampler, \
292 coord, \
293 OP); \
294 } while (0)
295
296 #define EMIT_ARITH( OP, N ) \
297 do { \
298 i915_emit_arith( p, \
299 OP, \
300 get_result_vector( p, inst ), \
301 get_result_flags( inst ), 0, \
302 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
303 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
304 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
305 } while (0)
306
307 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
308 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
309 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
310
311 /*
312 * TODO: consider moving this into core
313 */
314 static bool calc_live_regs( struct i915_fragment_program *p )
315 {
316 const struct gl_fragment_program *program = &p->FragProg;
317 GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
318 uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
319 GLint i;
320
321 for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
322 struct prog_instruction *inst = &program->Base.Instructions[i];
323 int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
324 int a;
325
326 /* Register is written to: unmark as live for this and preceeding ops */
327 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
328 if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
329 return false;
330
331 live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
332 if (live_components[inst->DstReg.Index] == 0)
333 regsUsed &= ~(1 << inst->DstReg.Index);
334 }
335
336 for (a = 0; a < opArgs; a++) {
337 /* Register is read from: mark as live for this and preceeding ops */
338 if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
339 unsigned c;
340
341 if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
342 return false;
343
344 regsUsed |= 1 << inst->SrcReg[a].Index;
345
346 for (c = 0; c < 4; c++) {
347 const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
348
349 if (field <= SWIZZLE_W)
350 live_components[inst->SrcReg[a].Index] |= (1U << field);
351 }
352 }
353 }
354
355 p->usedRegs[i] = regsUsed;
356 }
357
358 return true;
359 }
360
361 static GLuint get_live_regs( struct i915_fragment_program *p,
362 const struct prog_instruction *inst )
363 {
364 const struct gl_fragment_program *program = &p->FragProg;
365 GLuint nr = inst - program->Base.Instructions;
366
367 return p->usedRegs[nr];
368 }
369
370
371 /* Possible concerns:
372 *
373 * SIN, COS -- could use another taylor step?
374 * LIT -- results seem a little different to sw mesa
375 * LOG -- different to mesa on negative numbers, but this is conformant.
376 *
377 * Parse failures -- Mesa doesn't currently give a good indication
378 * internally whether a particular program string parsed or not. This
379 * can lead to confusion -- hopefully we cope with it ok now.
380 *
381 */
382 static void
383 upload_program(struct i915_fragment_program *p)
384 {
385 const struct gl_fragment_program *program = &p->FragProg;
386 const struct prog_instruction *inst = program->Base.Instructions;
387
388 if (INTEL_DEBUG & DEBUG_WM)
389 _mesa_print_program(&program->Base);
390
391 /* Is this a parse-failed program? Ensure a valid program is
392 * loaded, as the flagging of an error isn't sufficient to stop
393 * this being uploaded to hardware.
394 */
395 if (inst[0].Opcode == OPCODE_END) {
396 GLuint tmp = i915_get_utemp(p);
397 i915_emit_arith(p,
398 A0_MOV,
399 UREG(REG_TYPE_OC, 0),
400 A0_DEST_CHANNEL_ALL, 0,
401 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
402 return;
403 }
404
405 if (program->Base.NumInstructions > I915_MAX_INSN) {
406 i915_program_error(p, "Exceeded max instructions (%d out of %d)",
407 program->Base.NumInstructions, I915_MAX_INSN);
408 return;
409 }
410
411 /* Not always needed:
412 */
413 if (!calc_live_regs(p)) {
414 i915_program_error(p, "Could not allocate registers");
415 return;
416 }
417
418 while (1) {
419 GLuint src0, src1, src2, flags;
420 GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
421
422 switch (inst->Opcode) {
423 case OPCODE_ABS:
424 src0 = src_vector(p, &inst->SrcReg[0], program);
425 i915_emit_arith(p,
426 A0_MAX,
427 get_result_vector(p, inst),
428 get_result_flags(inst), 0,
429 src0, negate(src0, 1, 1, 1, 1), 0);
430 break;
431
432 case OPCODE_ADD:
433 EMIT_2ARG_ARITH(A0_ADD);
434 break;
435
436 case OPCODE_CMP:
437 src0 = src_vector(p, &inst->SrcReg[0], program);
438 src1 = src_vector(p, &inst->SrcReg[1], program);
439 src2 = src_vector(p, &inst->SrcReg[2], program);
440 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */
441 break;
442
443 case OPCODE_COS:
444 src0 = src_vector(p, &inst->SrcReg[0], program);
445 tmp = i915_get_utemp(p);
446 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
447 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
448
449 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
450 i915_emit_arith(p,
451 A0_MAD,
452 tmp, A0_DEST_CHANNEL_X, 0,
453 src0,
454 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
455 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
456
457 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
458
459 i915_emit_arith(p,
460 A0_MAD,
461 tmp, A0_DEST_CHANNEL_X, 0,
462 tmp,
463 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
464 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
465
466 /* Compute COS with the same calculation used for SIN, but a
467 * different source range has been mapped to [-1,1] this time.
468 */
469
470 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
471 i915_emit_arith(p,
472 A0_MAX,
473 tmp, A0_DEST_CHANNEL_Y, 0,
474 swizzle(tmp, ZERO, X, ZERO, ZERO),
475 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
476 0);
477
478 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
479 i915_emit_arith(p,
480 A0_MUL,
481 tmp, A0_DEST_CHANNEL_Y, 0,
482 swizzle(tmp, ZERO, X, ZERO, ZERO),
483 tmp,
484 0);
485
486 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
487 i915_emit_arith(p,
488 A0_DP3,
489 tmp, A0_DEST_CHANNEL_X, 0,
490 tmp,
491 swizzle(consts1, X, Y, ZERO, ZERO),
492 0);
493
494 /* tmp.x now contains a first approximation (y). Now, weight it
495 * against tmp.y**2 to get closer.
496 */
497 i915_emit_arith(p,
498 A0_MAX,
499 tmp, A0_DEST_CHANNEL_Y, 0,
500 swizzle(tmp, ZERO, X, ZERO, ZERO),
501 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
502 0);
503
504 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
505 i915_emit_arith(p,
506 A0_MAD,
507 tmp, A0_DEST_CHANNEL_Y, 0,
508 swizzle(tmp, ZERO, X, ZERO, ZERO),
509 swizzle(tmp, ZERO, Y, ZERO, ZERO),
510 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
511
512 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
513 i915_emit_arith(p,
514 A0_MAD,
515 get_result_vector(p, inst),
516 get_result_flags(inst), 0,
517 swizzle(consts1, W, W, W, W),
518 swizzle(tmp, Y, Y, Y, Y),
519 swizzle(tmp, X, X, X, X));
520 break;
521
522 case OPCODE_DP2:
523 src0 = src_vector(p, &inst->SrcReg[0], program);
524 src1 = src_vector(p, &inst->SrcReg[1], program);
525 i915_emit_arith(p,
526 A0_DP3,
527 get_result_vector(p, inst),
528 get_result_flags(inst), 0,
529 swizzle(src0, X, Y, ZERO, ZERO),
530 swizzle(src1, X, Y, ZERO, ZERO),
531 0);
532 break;
533
534 case OPCODE_DP3:
535 EMIT_2ARG_ARITH(A0_DP3);
536 break;
537
538 case OPCODE_DP4:
539 EMIT_2ARG_ARITH(A0_DP4);
540 break;
541
542 case OPCODE_DPH:
543 src0 = src_vector(p, &inst->SrcReg[0], program);
544 src1 = src_vector(p, &inst->SrcReg[1], program);
545
546 i915_emit_arith(p,
547 A0_DP4,
548 get_result_vector(p, inst),
549 get_result_flags(inst), 0,
550 swizzle(src0, X, Y, Z, ONE), src1, 0);
551 break;
552
553 case OPCODE_DST:
554 src0 = src_vector(p, &inst->SrcReg[0], program);
555 src1 = src_vector(p, &inst->SrcReg[1], program);
556
557 /* result[0] = 1 * 1;
558 * result[1] = a[1] * b[1];
559 * result[2] = a[2] * 1;
560 * result[3] = 1 * b[3];
561 */
562 i915_emit_arith(p,
563 A0_MUL,
564 get_result_vector(p, inst),
565 get_result_flags(inst), 0,
566 swizzle(src0, ONE, Y, Z, ONE),
567 swizzle(src1, ONE, Y, ONE, W), 0);
568 break;
569
570 case OPCODE_EX2:
571 src0 = src_vector(p, &inst->SrcReg[0], program);
572
573 i915_emit_arith(p,
574 A0_EXP,
575 get_result_vector(p, inst),
576 get_result_flags(inst), 0,
577 swizzle(src0, X, X, X, X), 0, 0);
578 break;
579
580 case OPCODE_FLR:
581 EMIT_1ARG_ARITH(A0_FLR);
582 break;
583
584 case OPCODE_TRUNC:
585 EMIT_1ARG_ARITH(A0_TRC);
586 break;
587
588 case OPCODE_FRC:
589 EMIT_1ARG_ARITH(A0_FRC);
590 break;
591
592 case OPCODE_KIL:
593 src0 = src_vector(p, &inst->SrcReg[0], program);
594 tmp = i915_get_utemp(p);
595
596 i915_emit_texld(p, get_live_regs(p, inst),
597 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
598 0, src0, T0_TEXKILL);
599 break;
600
601 case OPCODE_LG2:
602 src0 = src_vector(p, &inst->SrcReg[0], program);
603
604 i915_emit_arith(p,
605 A0_LOG,
606 get_result_vector(p, inst),
607 get_result_flags(inst), 0,
608 swizzle(src0, X, X, X, X), 0, 0);
609 break;
610
611 case OPCODE_LIT:
612 src0 = src_vector(p, &inst->SrcReg[0], program);
613 tmp = i915_get_utemp(p);
614
615 /* tmp = max( a.xyzw, a.00zw )
616 * XXX: Clamp tmp.w to -128..128
617 * tmp.y = log(tmp.y)
618 * tmp.y = tmp.w * tmp.y
619 * tmp.y = exp(tmp.y)
620 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
621 */
622 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
623 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
624
625 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
626 swizzle(tmp, Y, Y, Y, Y), 0, 0);
627
628 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
629 swizzle(tmp, ZERO, Y, ZERO, ZERO),
630 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
631
632 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
633 swizzle(tmp, Y, Y, Y, Y), 0, 0);
634
635 i915_emit_arith(p, A0_CMP,
636 get_result_vector(p, inst),
637 get_result_flags(inst), 0,
638 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
639 swizzle(tmp, ONE, X, ZERO, ONE),
640 swizzle(tmp, ONE, X, Y, ONE));
641
642 break;
643
644 case OPCODE_LRP:
645 src0 = src_vector(p, &inst->SrcReg[0], program);
646 src1 = src_vector(p, &inst->SrcReg[1], program);
647 src2 = src_vector(p, &inst->SrcReg[2], program);
648 flags = get_result_flags(inst);
649 tmp = i915_get_utemp(p);
650
651 /* b*a + c*(1-a)
652 *
653 * b*a + c - ca
654 *
655 * tmp = b*a + c,
656 * result = (-c)*a + tmp
657 */
658 i915_emit_arith(p, A0_MAD, tmp,
659 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
660
661 i915_emit_arith(p, A0_MAD,
662 get_result_vector(p, inst),
663 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
664 break;
665
666 case OPCODE_MAD:
667 EMIT_3ARG_ARITH(A0_MAD);
668 break;
669
670 case OPCODE_MAX:
671 EMIT_2ARG_ARITH(A0_MAX);
672 break;
673
674 case OPCODE_MIN:
675 EMIT_2ARG_ARITH(A0_MIN);
676 break;
677
678 case OPCODE_MOV:
679 EMIT_1ARG_ARITH(A0_MOV);
680 break;
681
682 case OPCODE_MUL:
683 EMIT_2ARG_ARITH(A0_MUL);
684 break;
685
686 case OPCODE_POW:
687 src0 = src_vector(p, &inst->SrcReg[0], program);
688 src1 = src_vector(p, &inst->SrcReg[1], program);
689 tmp = i915_get_utemp(p);
690 flags = get_result_flags(inst);
691
692 /* XXX: masking on intermediate values, here and elsewhere.
693 */
694 i915_emit_arith(p,
695 A0_LOG,
696 tmp, A0_DEST_CHANNEL_X, 0,
697 swizzle(src0, X, X, X, X), 0, 0);
698
699 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
700
701
702 i915_emit_arith(p,
703 A0_EXP,
704 get_result_vector(p, inst),
705 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
706
707 break;
708
709 case OPCODE_RCP:
710 src0 = src_vector(p, &inst->SrcReg[0], program);
711
712 i915_emit_arith(p,
713 A0_RCP,
714 get_result_vector(p, inst),
715 get_result_flags(inst), 0,
716 swizzle(src0, X, X, X, X), 0, 0);
717 break;
718
719 case OPCODE_RSQ:
720
721 src0 = src_vector(p, &inst->SrcReg[0], program);
722
723 i915_emit_arith(p,
724 A0_RSQ,
725 get_result_vector(p, inst),
726 get_result_flags(inst), 0,
727 swizzle(src0, X, X, X, X), 0, 0);
728 break;
729
730 case OPCODE_SCS:
731 src0 = src_vector(p, &inst->SrcReg[0], program);
732 tmp = i915_get_utemp(p);
733
734 /*
735 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
736 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
737 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
738 * scs.x = DP4 t1, sin_constants
739 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
740 * scs.y = DP4 t1, cos_constants
741 */
742 i915_emit_arith(p,
743 A0_MUL,
744 tmp, A0_DEST_CHANNEL_XY, 0,
745 swizzle(src0, X, X, ONE, ONE),
746 swizzle(src0, X, ONE, ONE, ONE), 0);
747
748 i915_emit_arith(p,
749 A0_MUL,
750 tmp, A0_DEST_CHANNEL_ALL, 0,
751 swizzle(tmp, X, Y, X, Y),
752 swizzle(tmp, X, X, ONE, ONE), 0);
753
754 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
755 GLuint tmp1;
756
757 if (inst->DstReg.WriteMask & WRITEMASK_X)
758 tmp1 = i915_get_utemp(p);
759 else
760 tmp1 = tmp;
761
762 i915_emit_arith(p,
763 A0_MUL,
764 tmp1, A0_DEST_CHANNEL_ALL, 0,
765 swizzle(tmp, X, Y, Y, W),
766 swizzle(tmp, X, Z, ONE, ONE), 0);
767
768 i915_emit_arith(p,
769 A0_DP4,
770 get_result_vector(p, inst),
771 A0_DEST_CHANNEL_Y, 0,
772 swizzle(tmp1, W, Z, Y, X),
773 i915_emit_const4fv(p, sin_constants), 0);
774 }
775
776 if (inst->DstReg.WriteMask & WRITEMASK_X) {
777 i915_emit_arith(p,
778 A0_MUL,
779 tmp, A0_DEST_CHANNEL_XYZ, 0,
780 swizzle(tmp, X, X, Z, ONE),
781 swizzle(tmp, Z, ONE, ONE, ONE), 0);
782
783 i915_emit_arith(p,
784 A0_DP4,
785 get_result_vector(p, inst),
786 A0_DEST_CHANNEL_X, 0,
787 swizzle(tmp, ONE, Z, Y, X),
788 i915_emit_const4fv(p, cos_constants), 0);
789 }
790 break;
791
792 case OPCODE_SIN:
793 src0 = src_vector(p, &inst->SrcReg[0], program);
794 tmp = i915_get_utemp(p);
795 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
796 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
797
798 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
799 i915_emit_arith(p,
800 A0_MAD,
801 tmp, A0_DEST_CHANNEL_X, 0,
802 src0,
803 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
804 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
805
806 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
807
808 i915_emit_arith(p,
809 A0_MAD,
810 tmp, A0_DEST_CHANNEL_X, 0,
811 tmp,
812 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
813 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
814
815 /* Compute sin using a quadratic and quartic. It gives continuity
816 * that repeating the Taylor series lacks every 2*pi, and has
817 * reduced error.
818 *
819 * The idea was described at:
820 * http://www.devmaster.net/forums/showthread.php?t=5784
821 */
822
823 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
824 i915_emit_arith(p,
825 A0_MAX,
826 tmp, A0_DEST_CHANNEL_Y, 0,
827 swizzle(tmp, ZERO, X, ZERO, ZERO),
828 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
829 0);
830
831 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
832 i915_emit_arith(p,
833 A0_MUL,
834 tmp, A0_DEST_CHANNEL_Y, 0,
835 swizzle(tmp, ZERO, X, ZERO, ZERO),
836 tmp,
837 0);
838
839 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
840 i915_emit_arith(p,
841 A0_DP3,
842 tmp, A0_DEST_CHANNEL_X, 0,
843 tmp,
844 swizzle(consts1, X, Y, ZERO, ZERO),
845 0);
846
847 /* tmp.x now contains a first approximation (y). Now, weight it
848 * against tmp.y**2 to get closer.
849 */
850 i915_emit_arith(p,
851 A0_MAX,
852 tmp, A0_DEST_CHANNEL_Y, 0,
853 swizzle(tmp, ZERO, X, ZERO, ZERO),
854 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
855 0);
856
857 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
858 i915_emit_arith(p,
859 A0_MAD,
860 tmp, A0_DEST_CHANNEL_Y, 0,
861 swizzle(tmp, ZERO, X, ZERO, ZERO),
862 swizzle(tmp, ZERO, Y, ZERO, ZERO),
863 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
864
865 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
866 i915_emit_arith(p,
867 A0_MAD,
868 get_result_vector(p, inst),
869 get_result_flags(inst), 0,
870 swizzle(consts1, W, W, W, W),
871 swizzle(tmp, Y, Y, Y, Y),
872 swizzle(tmp, X, X, X, X));
873
874 break;
875
876 case OPCODE_SGE:
877 EMIT_2ARG_ARITH(A0_SGE);
878 break;
879
880 case OPCODE_SLT:
881 EMIT_2ARG_ARITH(A0_SLT);
882 break;
883
884 case OPCODE_SSG:
885 dst = get_result_vector(p, inst);
886 flags = get_result_flags(inst);
887 src0 = src_vector(p, &inst->SrcReg[0], program);
888 tmp = i915_get_utemp(p);
889
890 /* tmp = (src < 0.0) */
891 i915_emit_arith(p,
892 A0_SLT,
893 tmp,
894 flags, 0,
895 src0,
896 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
897 0);
898
899 /* dst = (0.0 < src) */
900 i915_emit_arith(p,
901 A0_SLT,
902 dst,
903 flags, 0,
904 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
905 src0,
906 0);
907
908 /* dst = (src > 0.0) - (src < 0.0) */
909 i915_emit_arith(p,
910 A0_ADD,
911 dst,
912 flags, 0,
913 dst,
914 negate(tmp, 1, 1, 1, 1),
915 0);
916
917 break;
918
919 case OPCODE_SUB:
920 src0 = src_vector(p, &inst->SrcReg[0], program);
921 src1 = src_vector(p, &inst->SrcReg[1], program);
922
923 i915_emit_arith(p,
924 A0_ADD,
925 get_result_vector(p, inst),
926 get_result_flags(inst), 0,
927 src0, negate(src1, 1, 1, 1, 1), 0);
928 break;
929
930 case OPCODE_SWZ:
931 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */
932 break;
933
934 case OPCODE_TEX:
935 EMIT_TEX(T0_TEXLD);
936 break;
937
938 case OPCODE_TXB:
939 EMIT_TEX(T0_TEXLDB);
940 break;
941
942 case OPCODE_TXP:
943 EMIT_TEX(T0_TEXLDP);
944 break;
945
946 case OPCODE_XPD:
947 /* Cross product:
948 * result.x = src0.y * src1.z - src0.z * src1.y;
949 * result.y = src0.z * src1.x - src0.x * src1.z;
950 * result.z = src0.x * src1.y - src0.y * src1.x;
951 * result.w = undef;
952 */
953 src0 = src_vector(p, &inst->SrcReg[0], program);
954 src1 = src_vector(p, &inst->SrcReg[1], program);
955 tmp = i915_get_utemp(p);
956
957 i915_emit_arith(p,
958 A0_MUL,
959 tmp, A0_DEST_CHANNEL_ALL, 0,
960 swizzle(src0, Z, X, Y, ONE),
961 swizzle(src1, Y, Z, X, ONE), 0);
962
963 i915_emit_arith(p,
964 A0_MAD,
965 get_result_vector(p, inst),
966 get_result_flags(inst), 0,
967 swizzle(src0, Y, Z, X, ONE),
968 swizzle(src1, Z, X, Y, ONE),
969 negate(tmp, 1, 1, 1, 0));
970 break;
971
972 case OPCODE_END:
973 return;
974
975 case OPCODE_BGNLOOP:
976 case OPCODE_BGNSUB:
977 case OPCODE_BRK:
978 case OPCODE_CAL:
979 case OPCODE_CONT:
980 case OPCODE_DDX:
981 case OPCODE_DDY:
982 case OPCODE_ELSE:
983 case OPCODE_ENDIF:
984 case OPCODE_ENDLOOP:
985 case OPCODE_ENDSUB:
986 case OPCODE_IF:
987 case OPCODE_RET:
988 p->error = 1;
989 i915_program_error(p, "Unsupported opcode: %s",
990 _mesa_opcode_string(inst->Opcode));
991 return;
992
993 case OPCODE_EXP:
994 case OPCODE_LOG:
995 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
996 * prog_instruction.h, but apparently GLSL doesn't ever emit them.
997 * Instead, it translates to EX2 or LG2.
998 */
999 case OPCODE_TXD:
1000 case OPCODE_TXL:
1001 /* These opcodes are claimed by GLSL in prog_instruction.h, but
1002 * only NV_vp/fp appears to emit them.
1003 */
1004 default:
1005 i915_program_error(p, "bad opcode: %s",
1006 _mesa_opcode_string(inst->Opcode));
1007 return;
1008 }
1009
1010 inst++;
1011 i915_release_utemps(p);
1012 }
1013 }
1014
1015 /* Rather than trying to intercept and jiggle depth writes during
1016 * emit, just move the value into its correct position at the end of
1017 * the program:
1018 */
1019 static void
1020 fixup_depth_write(struct i915_fragment_program *p)
1021 {
1022 if (p->depth_written) {
1023 GLuint depth = UREG(REG_TYPE_OD, 0);
1024
1025 i915_emit_arith(p,
1026 A0_MOV,
1027 depth, A0_DEST_CHANNEL_W, 0,
1028 swizzle(depth, X, Y, Z, Z), 0, 0);
1029 }
1030 }
1031
1032 static void
1033 check_texcoord_mapping(struct i915_fragment_program *p)
1034 {
1035 GLbitfield64 inputs = p->FragProg.Base.InputsRead;
1036 unsigned unit = 0;
1037
1038 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1039 if (inputs & VARYING_BIT_TEX(i)) {
1040 if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1041 unit++;
1042 break;
1043 }
1044 p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
1045 }
1046 if (inputs & VARYING_BIT_VAR(i)) {
1047 if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1048 unit++;
1049 break;
1050 }
1051 p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
1052 }
1053 }
1054
1055 if (unit > p->ctx->Const.MaxTextureCoordUnits)
1056 i915_program_error(p, "Too many texcoord units");
1057 }
1058
1059 static void
1060 check_wpos(struct i915_fragment_program *p)
1061 {
1062 GLbitfield64 inputs = p->FragProg.Base.InputsRead;
1063 GLint i;
1064 unsigned unit = 0;
1065
1066 p->wpos_tex = -1;
1067
1068 if ((inputs & VARYING_BIT_POS) == 0)
1069 return;
1070
1071 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1072 unit += !!(inputs & VARYING_BIT_TEX(i));
1073 unit += !!(inputs & VARYING_BIT_VAR(i));
1074 }
1075
1076 if (unit < p->ctx->Const.MaxTextureCoordUnits)
1077 p->wpos_tex = unit;
1078 else
1079 i915_program_error(p, "No free texcoord for wpos value");
1080 }
1081
1082
1083 static void
1084 translate_program(struct i915_fragment_program *p)
1085 {
1086 struct i915_context *i915 = I915_CONTEXT(p->ctx);
1087
1088 if (INTEL_DEBUG & DEBUG_WM) {
1089 printf("fp:\n");
1090 _mesa_print_program(&p->FragProg.Base);
1091 printf("\n");
1092 }
1093
1094 i915_init_program(i915, p);
1095 check_texcoord_mapping(p);
1096 check_wpos(p);
1097 upload_program(p);
1098 fixup_depth_write(p);
1099 i915_fini_program(p);
1100
1101 p->translated = 1;
1102 }
1103
1104
1105 static void
1106 track_params(struct i915_fragment_program *p)
1107 {
1108 GLint i;
1109
1110 if (p->nr_params)
1111 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
1112
1113 for (i = 0; i < p->nr_params; i++) {
1114 GLint reg = p->param[i].reg;
1115 COPY_4V(p->constant[reg], p->param[i].values);
1116 }
1117
1118 p->params_uptodate = 1;
1119 p->on_hardware = 0; /* overkill */
1120 }
1121
1122
1123 static void
1124 i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
1125 {
1126 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1127 struct i915_context *i915 = I915_CONTEXT(ctx);
1128 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1129
1130 if (i915->current_program == p)
1131 return;
1132
1133 if (i915->current_program) {
1134 i915->current_program->on_hardware = 0;
1135 i915->current_program->params_uptodate = 0;
1136 }
1137
1138 i915->current_program = p;
1139
1140 assert(p->on_hardware == 0);
1141 assert(p->params_uptodate == 0);
1142
1143 }
1144 }
1145
1146 static struct gl_program *
1147 i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
1148 {
1149 switch (target) {
1150 case GL_VERTEX_PROGRAM_ARB: {
1151 struct gl_program *prog = CALLOC_STRUCT(gl_program);
1152 return _mesa_init_gl_program(prog, target, id);
1153 }
1154
1155 case GL_FRAGMENT_PROGRAM_ARB:{
1156 struct i915_fragment_program *prog =
1157 CALLOC_STRUCT(i915_fragment_program);
1158 if (prog) {
1159 i915_init_program(I915_CONTEXT(ctx), prog);
1160
1161 return _mesa_init_gl_program(&prog->FragProg.Base, target, id);
1162 }
1163 else
1164 return NULL;
1165 }
1166
1167 default:
1168 /* Just fallback:
1169 */
1170 return _mesa_new_program(ctx, target, id);
1171 }
1172 }
1173
1174 static void
1175 i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
1176 {
1177 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1178 struct i915_context *i915 = I915_CONTEXT(ctx);
1179 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1180
1181 if (i915->current_program == p)
1182 i915->current_program = 0;
1183 }
1184
1185 _mesa_delete_program(ctx, prog);
1186 }
1187
1188
1189 static GLboolean
1190 i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
1191 {
1192 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1193 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1194
1195 if (!p->translated)
1196 translate_program(p);
1197
1198 return !p->error;
1199 }
1200 else
1201 return true;
1202 }
1203
1204 static GLboolean
1205 i915ProgramStringNotify(struct gl_context * ctx,
1206 GLenum target, struct gl_program *prog)
1207 {
1208 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1209 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1210 p->translated = 0;
1211 }
1212
1213 (void) _tnl_program_string(ctx, target, prog);
1214
1215 /* XXX check if program is legal, within limits */
1216 return true;
1217 }
1218
1219 static void
1220 i915SamplerUniformChange(struct gl_context *ctx,
1221 GLenum target, struct gl_program *prog)
1222 {
1223 i915ProgramStringNotify(ctx, target, prog);
1224 }
1225
1226 void
1227 i915_update_program(struct gl_context *ctx)
1228 {
1229 struct intel_context *intel = intel_context(ctx);
1230 struct i915_context *i915 = i915_context(&intel->ctx);
1231 struct i915_fragment_program *fp =
1232 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1233
1234 if (i915->current_program != fp) {
1235 if (i915->current_program) {
1236 i915->current_program->on_hardware = 0;
1237 i915->current_program->params_uptodate = 0;
1238 }
1239
1240 i915->current_program = fp;
1241 }
1242
1243 if (!fp->translated)
1244 translate_program(fp);
1245
1246 FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
1247 }
1248
1249 void
1250 i915ValidateFragmentProgram(struct i915_context *i915)
1251 {
1252 struct gl_context *ctx = &i915->intel.ctx;
1253 struct intel_context *intel = intel_context(ctx);
1254 TNLcontext *tnl = TNL_CONTEXT(ctx);
1255 struct vertex_buffer *VB = &tnl->vb;
1256
1257 struct i915_fragment_program *p =
1258 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1259
1260 const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
1261 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
1262 GLuint s2 = S2_TEXCOORD_NONE;
1263 int i, offset = 0;
1264
1265 /* Important:
1266 */
1267 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
1268
1269 if (!p->translated)
1270 translate_program(p);
1271
1272 intel->vertex_attr_count = 0;
1273 intel->wpos_offset = 0;
1274 intel->coloroffset = 0;
1275 intel->specoffset = 0;
1276
1277 if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
1278 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
1279 }
1280 else {
1281 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
1282 }
1283
1284 /* Handle gl_PointSize builtin var here */
1285 if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
1286 EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
1287
1288 if (inputsRead & VARYING_BIT_COL0) {
1289 intel->coloroffset = offset / 4;
1290 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
1291 }
1292
1293 if (inputsRead & VARYING_BIT_COL1) {
1294 intel->specoffset = offset / 4;
1295 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
1296 }
1297
1298 if ((inputsRead & VARYING_BIT_FOGC)) {
1299 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
1300 }
1301
1302 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1303 if (inputsRead & VARYING_BIT_TEX(i)) {
1304 int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
1305 int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
1306
1307 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1308 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1309
1310 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
1311 }
1312 if (inputsRead & VARYING_BIT_VAR(i)) {
1313 int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
1314 int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
1315
1316 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1317 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1318
1319 EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
1320 }
1321 if (i == p->wpos_tex) {
1322 int wpos_size = 4 * sizeof(float);
1323 /* If WPOS is required, duplicate the XYZ position data in an
1324 * unused texture coordinate:
1325 */
1326 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1327 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
1328
1329 intel->wpos_offset = offset;
1330 EMIT_PAD(wpos_size);
1331 }
1332 }
1333
1334 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1335 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1336 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1337
1338 /* Must do this *after* statechange, so as not to affect
1339 * buffered vertices reliant on the old state:
1340 */
1341 intel->vertex_size = _tnl_install_attrs(&intel->ctx,
1342 intel->vertex_attrs,
1343 intel->vertex_attr_count,
1344 intel->ViewportMatrix.m, 0);
1345
1346 assert(intel->prim.current_offset == intel->prim.start_offset);
1347 intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
1348 intel->prim.current_offset = intel->prim.start_offset;
1349
1350 intel->vertex_size >>= 2;
1351
1352 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1353 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1354
1355 assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
1356 }
1357
1358 if (!p->params_uptodate)
1359 track_params(p);
1360
1361 if (!p->on_hardware)
1362 i915_upload_program(i915, p);
1363
1364 if (INTEL_DEBUG & DEBUG_WM) {
1365 printf("i915:\n");
1366 i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
1367 }
1368 }
1369
1370 void
1371 i915InitFragProgFuncs(struct dd_function_table *functions)
1372 {
1373 functions->BindProgram = i915BindProgram;
1374 functions->NewProgram = i915NewProgram;
1375 functions->DeleteProgram = i915DeleteProgram;
1376 functions->IsProgramNative = i915IsProgramNative;
1377 functions->ProgramStringNotify = i915ProgramStringNotify;
1378 functions->SamplerUniformChange = i915SamplerUniformChange;
1379 }