i915: Use COPY_DWORDS for points
[mesa.git] / src / mesa / drivers / dri / i915 / i915_fragprog.c
1 /**************************************************************************
2 *
3 * Copyright 2003 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "main/glheader.h"
29 #include "main/macros.h"
30 #include "main/enums.h"
31
32 #include "program/prog_instruction.h"
33 #include "program/prog_parameter.h"
34 #include "program/program.h"
35 #include "program/programopt.h"
36 #include "program/prog_print.h"
37
38 #include "tnl/tnl.h"
39 #include "tnl/t_context.h"
40
41 #include "intel_batchbuffer.h"
42
43 #include "i915_reg.h"
44 #include "i915_context.h"
45 #include "i915_program.h"
46
47 static const GLfloat sin_quad_constants[2][4] = {
48 {
49 2.0,
50 -1.0,
51 .5,
52 .75
53 },
54 {
55 4.0,
56 -4.0,
57 1.0 / (2.0 * M_PI),
58 .2225
59 }
60 };
61
62 static const GLfloat sin_constants[4] = { 1.0,
63 -1.0 / (3 * 2 * 1),
64 1.0 / (5 * 4 * 3 * 2 * 1),
65 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
66 };
67
68 /* 1, -1/2!, 1/4!, -1/6! */
69 static const GLfloat cos_constants[4] = { 1.0,
70 -1.0 / (2 * 1),
71 1.0 / (4 * 3 * 2 * 1),
72 -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
73 };
74
75 /* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
76 #define TEXCOORD_TEX (0<<7)
77 #define TEXCOORD_VAR (1<<7)
78
79 static unsigned
80 get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
81 {
82 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
83 if (p->texcoord_mapping[i] == texcoord)
84 return i;
85 }
86
87 /* blah */
88 return p->ctx->Const.MaxTextureCoordUnits - 1;
89 }
90
91 /**
92 * Retrieve a ureg for the given source register. Will emit
93 * constants, apply swizzling and negation as needed.
94 */
95 static GLuint
96 src_vector(struct i915_fragment_program *p,
97 const struct prog_src_register *source,
98 const struct gl_fragment_program *program)
99 {
100 GLuint src;
101 unsigned unit;
102
103 switch (source->File) {
104
105 /* Registers:
106 */
107 case PROGRAM_TEMPORARY:
108 if (source->Index >= I915_MAX_TEMPORARY) {
109 i915_program_error(p, "Exceeded max temporary reg: %d/%d",
110 source->Index, I915_MAX_TEMPORARY);
111 return 0;
112 }
113 src = UREG(REG_TYPE_R, source->Index);
114 break;
115 case PROGRAM_INPUT:
116 switch (source->Index) {
117 case VARYING_SLOT_POS:
118 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
119 break;
120 case VARYING_SLOT_COL0:
121 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
122 break;
123 case VARYING_SLOT_COL1:
124 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
125 src = swizzle(src, X, Y, Z, ONE);
126 break;
127 case VARYING_SLOT_FOGC:
128 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
129 src = swizzle(src, W, ZERO, ZERO, ONE);
130 break;
131 case VARYING_SLOT_TEX0:
132 case VARYING_SLOT_TEX1:
133 case VARYING_SLOT_TEX2:
134 case VARYING_SLOT_TEX3:
135 case VARYING_SLOT_TEX4:
136 case VARYING_SLOT_TEX5:
137 case VARYING_SLOT_TEX6:
138 case VARYING_SLOT_TEX7:
139 unit = get_texcoord_mapping(p, (source->Index -
140 VARYING_SLOT_TEX0) | TEXCOORD_TEX);
141 src = i915_emit_decl(p, REG_TYPE_T,
142 T_TEX0 + unit,
143 D0_CHANNEL_ALL);
144 break;
145
146 case VARYING_SLOT_VAR0:
147 case VARYING_SLOT_VAR0 + 1:
148 case VARYING_SLOT_VAR0 + 2:
149 case VARYING_SLOT_VAR0 + 3:
150 case VARYING_SLOT_VAR0 + 4:
151 case VARYING_SLOT_VAR0 + 5:
152 case VARYING_SLOT_VAR0 + 6:
153 case VARYING_SLOT_VAR0 + 7:
154 unit = get_texcoord_mapping(p, (source->Index -
155 VARYING_SLOT_VAR0) | TEXCOORD_VAR);
156 src = i915_emit_decl(p, REG_TYPE_T,
157 T_TEX0 + unit,
158 D0_CHANNEL_ALL);
159 break;
160
161 default:
162 i915_program_error(p, "Bad source->Index: %d", source->Index);
163 return 0;
164 }
165 break;
166
167 case PROGRAM_OUTPUT:
168 switch (source->Index) {
169 case FRAG_RESULT_COLOR:
170 case FRAG_RESULT_DATA0:
171 src = UREG(REG_TYPE_OC, 0);
172 break;
173 case FRAG_RESULT_DEPTH:
174 src = UREG(REG_TYPE_OD, 0);
175 break;
176 default:
177 i915_program_error(p, "Bad source->Index: %d", source->Index);
178 return 0;
179 }
180 break;
181
182 /* Various paramters and env values. All emitted to
183 * hardware as program constants.
184 */
185 case PROGRAM_CONSTANT:
186 case PROGRAM_STATE_VAR:
187 case PROGRAM_UNIFORM:
188 src = i915_emit_param4fv(p,
189 &program->Base.Parameters->ParameterValues[source->Index][0].f);
190 break;
191
192 default:
193 i915_program_error(p, "Bad source->File: %d", source->File);
194 return 0;
195 }
196
197 src = swizzle(src,
198 GET_SWZ(source->Swizzle, 0),
199 GET_SWZ(source->Swizzle, 1),
200 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
201
202 if (source->Negate)
203 src = negate(src,
204 GET_BIT(source->Negate, 0),
205 GET_BIT(source->Negate, 1),
206 GET_BIT(source->Negate, 2),
207 GET_BIT(source->Negate, 3));
208
209 return src;
210 }
211
212
213 static GLuint
214 get_result_vector(struct i915_fragment_program *p,
215 const struct prog_instruction *inst)
216 {
217 switch (inst->DstReg.File) {
218 case PROGRAM_OUTPUT:
219 switch (inst->DstReg.Index) {
220 case FRAG_RESULT_COLOR:
221 case FRAG_RESULT_DATA0:
222 return UREG(REG_TYPE_OC, 0);
223 case FRAG_RESULT_DEPTH:
224 p->depth_written = 1;
225 return UREG(REG_TYPE_OD, 0);
226 default:
227 i915_program_error(p, "Bad inst->DstReg.Index: %d",
228 inst->DstReg.Index);
229 return 0;
230 }
231 case PROGRAM_TEMPORARY:
232 return UREG(REG_TYPE_R, inst->DstReg.Index);
233 default:
234 i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
235 return 0;
236 }
237 }
238
239 static GLuint
240 get_result_flags(const struct prog_instruction *inst)
241 {
242 GLuint flags = 0;
243
244 if (inst->Saturate)
245 flags |= A0_DEST_SATURATE;
246 if (inst->DstReg.WriteMask & WRITEMASK_X)
247 flags |= A0_DEST_CHANNEL_X;
248 if (inst->DstReg.WriteMask & WRITEMASK_Y)
249 flags |= A0_DEST_CHANNEL_Y;
250 if (inst->DstReg.WriteMask & WRITEMASK_Z)
251 flags |= A0_DEST_CHANNEL_Z;
252 if (inst->DstReg.WriteMask & WRITEMASK_W)
253 flags |= A0_DEST_CHANNEL_W;
254
255 return flags;
256 }
257
258 static GLuint
259 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
260 {
261 switch (bit) {
262 case TEXTURE_1D_INDEX:
263 return D0_SAMPLE_TYPE_2D;
264 case TEXTURE_2D_INDEX:
265 return D0_SAMPLE_TYPE_2D;
266 case TEXTURE_RECT_INDEX:
267 return D0_SAMPLE_TYPE_2D;
268 case TEXTURE_3D_INDEX:
269 return D0_SAMPLE_TYPE_VOLUME;
270 case TEXTURE_CUBE_INDEX:
271 return D0_SAMPLE_TYPE_CUBE;
272 default:
273 i915_program_error(p, "TexSrcBit: %d", bit);
274 return 0;
275 }
276 }
277
278 #define EMIT_TEX( OP ) \
279 do { \
280 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \
281 const struct gl_fragment_program *program = &p->FragProg; \
282 GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit]; \
283 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \
284 unit, dim); \
285 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \
286 /* Texel lookup */ \
287 \
288 i915_emit_texld( p, get_live_regs(p, inst), \
289 get_result_vector( p, inst ), \
290 get_result_flags( inst ), \
291 sampler, \
292 coord, \
293 OP); \
294 } while (0)
295
296 #define EMIT_ARITH( OP, N ) \
297 do { \
298 i915_emit_arith( p, \
299 OP, \
300 get_result_vector( p, inst ), \
301 get_result_flags( inst ), 0, \
302 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \
303 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \
304 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \
305 } while (0)
306
307 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
308 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
309 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
310
311 /*
312 * TODO: consider moving this into core
313 */
314 static bool calc_live_regs( struct i915_fragment_program *p )
315 {
316 const struct gl_fragment_program *program = &p->FragProg;
317 GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
318 uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
319 GLint i;
320
321 for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
322 struct prog_instruction *inst = &program->Base.Instructions[i];
323 int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
324 int a;
325
326 /* Register is written to: unmark as live for this and preceeding ops */
327 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
328 if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
329 return false;
330
331 live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
332 if (live_components[inst->DstReg.Index] == 0)
333 regsUsed &= ~(1 << inst->DstReg.Index);
334 }
335
336 for (a = 0; a < opArgs; a++) {
337 /* Register is read from: mark as live for this and preceeding ops */
338 if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
339 unsigned c;
340
341 if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
342 return false;
343
344 regsUsed |= 1 << inst->SrcReg[a].Index;
345
346 for (c = 0; c < 4; c++) {
347 const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
348
349 if (field <= SWIZZLE_W)
350 live_components[inst->SrcReg[a].Index] |= (1U << field);
351 }
352 }
353 }
354
355 p->usedRegs[i] = regsUsed;
356 }
357
358 return true;
359 }
360
361 static GLuint get_live_regs( struct i915_fragment_program *p,
362 const struct prog_instruction *inst )
363 {
364 const struct gl_fragment_program *program = &p->FragProg;
365 GLuint nr = inst - program->Base.Instructions;
366
367 return p->usedRegs[nr];
368 }
369
370
371 /* Possible concerns:
372 *
373 * SIN, COS -- could use another taylor step?
374 * LIT -- results seem a little different to sw mesa
375 * LOG -- different to mesa on negative numbers, but this is conformant.
376 *
377 * Parse failures -- Mesa doesn't currently give a good indication
378 * internally whether a particular program string parsed or not. This
379 * can lead to confusion -- hopefully we cope with it ok now.
380 *
381 */
382 static void
383 upload_program(struct i915_fragment_program *p)
384 {
385 const struct gl_fragment_program *program = &p->FragProg;
386 const struct prog_instruction *inst = program->Base.Instructions;
387
388 if (INTEL_DEBUG & DEBUG_WM)
389 _mesa_print_program(&program->Base);
390
391 /* Is this a parse-failed program? Ensure a valid program is
392 * loaded, as the flagging of an error isn't sufficient to stop
393 * this being uploaded to hardware.
394 */
395 if (inst[0].Opcode == OPCODE_END) {
396 GLuint tmp = i915_get_utemp(p);
397 i915_emit_arith(p,
398 A0_MOV,
399 UREG(REG_TYPE_OC, 0),
400 A0_DEST_CHANNEL_ALL, 0,
401 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
402 return;
403 }
404
405 if (program->Base.NumInstructions > I915_MAX_INSN) {
406 i915_program_error(p, "Exceeded max instructions (%d out of %d)",
407 program->Base.NumInstructions, I915_MAX_INSN);
408 return;
409 }
410
411 /* Not always needed:
412 */
413 if (!calc_live_regs(p)) {
414 i915_program_error(p, "Could not allocate registers");
415 return;
416 }
417
418 while (1) {
419 GLuint src0, src1, src2, flags;
420 GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
421
422 switch (inst->Opcode) {
423 case OPCODE_ABS:
424 src0 = src_vector(p, &inst->SrcReg[0], program);
425 i915_emit_arith(p,
426 A0_MAX,
427 get_result_vector(p, inst),
428 get_result_flags(inst), 0,
429 src0, negate(src0, 1, 1, 1, 1), 0);
430 break;
431
432 case OPCODE_ADD:
433 EMIT_2ARG_ARITH(A0_ADD);
434 break;
435
436 case OPCODE_CMP:
437 src0 = src_vector(p, &inst->SrcReg[0], program);
438 src1 = src_vector(p, &inst->SrcReg[1], program);
439 src2 = src_vector(p, &inst->SrcReg[2], program);
440 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */
441 break;
442
443 case OPCODE_COS:
444 src0 = src_vector(p, &inst->SrcReg[0], program);
445 tmp = i915_get_utemp(p);
446 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
447 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
448
449 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
450 i915_emit_arith(p,
451 A0_MAD,
452 tmp, A0_DEST_CHANNEL_X, 0,
453 src0,
454 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
455 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
456
457 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
458
459 i915_emit_arith(p,
460 A0_MAD,
461 tmp, A0_DEST_CHANNEL_X, 0,
462 tmp,
463 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
464 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
465
466 /* Compute COS with the same calculation used for SIN, but a
467 * different source range has been mapped to [-1,1] this time.
468 */
469
470 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
471 i915_emit_arith(p,
472 A0_MAX,
473 tmp, A0_DEST_CHANNEL_Y, 0,
474 swizzle(tmp, ZERO, X, ZERO, ZERO),
475 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
476 0);
477
478 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
479 i915_emit_arith(p,
480 A0_MUL,
481 tmp, A0_DEST_CHANNEL_Y, 0,
482 swizzle(tmp, ZERO, X, ZERO, ZERO),
483 tmp,
484 0);
485
486 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
487 i915_emit_arith(p,
488 A0_DP3,
489 tmp, A0_DEST_CHANNEL_X, 0,
490 tmp,
491 swizzle(consts1, X, Y, ZERO, ZERO),
492 0);
493
494 /* tmp.x now contains a first approximation (y). Now, weight it
495 * against tmp.y**2 to get closer.
496 */
497 i915_emit_arith(p,
498 A0_MAX,
499 tmp, A0_DEST_CHANNEL_Y, 0,
500 swizzle(tmp, ZERO, X, ZERO, ZERO),
501 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
502 0);
503
504 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
505 i915_emit_arith(p,
506 A0_MAD,
507 tmp, A0_DEST_CHANNEL_Y, 0,
508 swizzle(tmp, ZERO, X, ZERO, ZERO),
509 swizzle(tmp, ZERO, Y, ZERO, ZERO),
510 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
511
512 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
513 i915_emit_arith(p,
514 A0_MAD,
515 get_result_vector(p, inst),
516 get_result_flags(inst), 0,
517 swizzle(consts1, W, W, W, W),
518 swizzle(tmp, Y, Y, Y, Y),
519 swizzle(tmp, X, X, X, X));
520 break;
521
522 case OPCODE_DP2:
523 src0 = src_vector(p, &inst->SrcReg[0], program);
524 src1 = src_vector(p, &inst->SrcReg[1], program);
525 i915_emit_arith(p,
526 A0_DP3,
527 get_result_vector(p, inst),
528 get_result_flags(inst), 0,
529 swizzle(src0, X, Y, ZERO, ZERO),
530 swizzle(src1, X, Y, ZERO, ZERO),
531 0);
532 break;
533
534 case OPCODE_DP3:
535 EMIT_2ARG_ARITH(A0_DP3);
536 break;
537
538 case OPCODE_DP4:
539 EMIT_2ARG_ARITH(A0_DP4);
540 break;
541
542 case OPCODE_DPH:
543 src0 = src_vector(p, &inst->SrcReg[0], program);
544 src1 = src_vector(p, &inst->SrcReg[1], program);
545
546 i915_emit_arith(p,
547 A0_DP4,
548 get_result_vector(p, inst),
549 get_result_flags(inst), 0,
550 swizzle(src0, X, Y, Z, ONE), src1, 0);
551 break;
552
553 case OPCODE_DST:
554 src0 = src_vector(p, &inst->SrcReg[0], program);
555 src1 = src_vector(p, &inst->SrcReg[1], program);
556
557 /* result[0] = 1 * 1;
558 * result[1] = a[1] * b[1];
559 * result[2] = a[2] * 1;
560 * result[3] = 1 * b[3];
561 */
562 i915_emit_arith(p,
563 A0_MUL,
564 get_result_vector(p, inst),
565 get_result_flags(inst), 0,
566 swizzle(src0, ONE, Y, Z, ONE),
567 swizzle(src1, ONE, Y, ONE, W), 0);
568 break;
569
570 case OPCODE_EX2:
571 src0 = src_vector(p, &inst->SrcReg[0], program);
572
573 i915_emit_arith(p,
574 A0_EXP,
575 get_result_vector(p, inst),
576 get_result_flags(inst), 0,
577 swizzle(src0, X, X, X, X), 0, 0);
578 break;
579
580 case OPCODE_FLR:
581 EMIT_1ARG_ARITH(A0_FLR);
582 break;
583
584 case OPCODE_TRUNC:
585 EMIT_1ARG_ARITH(A0_TRC);
586 break;
587
588 case OPCODE_FRC:
589 EMIT_1ARG_ARITH(A0_FRC);
590 break;
591
592 case OPCODE_KIL:
593 src0 = src_vector(p, &inst->SrcReg[0], program);
594 tmp = i915_get_utemp(p);
595
596 i915_emit_texld(p, get_live_regs(p, inst),
597 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
598 0, src0, T0_TEXKILL);
599 break;
600
601 case OPCODE_KIL_NV:
602 if (inst->DstReg.CondMask == COND_TR) {
603 tmp = i915_get_utemp(p);
604
605 /* The KIL instruction discards the fragment if any component of
606 * the source is < 0. Emit an immediate operand of {-1}.xywz.
607 */
608 i915_emit_texld(p, get_live_regs(p, inst),
609 tmp, A0_DEST_CHANNEL_ALL,
610 0, /* use a dummy dest reg */
611 negate(swizzle(tmp, ONE, ONE, ONE, ONE),
612 1, 1, 1, 1),
613 T0_TEXKILL);
614 } else {
615 p->error = 1;
616 i915_program_error(p, "Unsupported KIL_NV condition code: %d",
617 inst->DstReg.CondMask);
618 }
619 break;
620
621 case OPCODE_LG2:
622 src0 = src_vector(p, &inst->SrcReg[0], program);
623
624 i915_emit_arith(p,
625 A0_LOG,
626 get_result_vector(p, inst),
627 get_result_flags(inst), 0,
628 swizzle(src0, X, X, X, X), 0, 0);
629 break;
630
631 case OPCODE_LIT:
632 src0 = src_vector(p, &inst->SrcReg[0], program);
633 tmp = i915_get_utemp(p);
634
635 /* tmp = max( a.xyzw, a.00zw )
636 * XXX: Clamp tmp.w to -128..128
637 * tmp.y = log(tmp.y)
638 * tmp.y = tmp.w * tmp.y
639 * tmp.y = exp(tmp.y)
640 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
641 */
642 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
643 src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
644
645 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
646 swizzle(tmp, Y, Y, Y, Y), 0, 0);
647
648 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
649 swizzle(tmp, ZERO, Y, ZERO, ZERO),
650 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
651
652 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
653 swizzle(tmp, Y, Y, Y, Y), 0, 0);
654
655 i915_emit_arith(p, A0_CMP,
656 get_result_vector(p, inst),
657 get_result_flags(inst), 0,
658 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
659 swizzle(tmp, ONE, X, ZERO, ONE),
660 swizzle(tmp, ONE, X, Y, ONE));
661
662 break;
663
664 case OPCODE_LRP:
665 src0 = src_vector(p, &inst->SrcReg[0], program);
666 src1 = src_vector(p, &inst->SrcReg[1], program);
667 src2 = src_vector(p, &inst->SrcReg[2], program);
668 flags = get_result_flags(inst);
669 tmp = i915_get_utemp(p);
670
671 /* b*a + c*(1-a)
672 *
673 * b*a + c - ca
674 *
675 * tmp = b*a + c,
676 * result = (-c)*a + tmp
677 */
678 i915_emit_arith(p, A0_MAD, tmp,
679 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
680
681 i915_emit_arith(p, A0_MAD,
682 get_result_vector(p, inst),
683 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
684 break;
685
686 case OPCODE_MAD:
687 EMIT_3ARG_ARITH(A0_MAD);
688 break;
689
690 case OPCODE_MAX:
691 EMIT_2ARG_ARITH(A0_MAX);
692 break;
693
694 case OPCODE_MIN:
695 EMIT_2ARG_ARITH(A0_MIN);
696 break;
697
698 case OPCODE_MOV:
699 EMIT_1ARG_ARITH(A0_MOV);
700 break;
701
702 case OPCODE_MUL:
703 EMIT_2ARG_ARITH(A0_MUL);
704 break;
705
706 case OPCODE_POW:
707 src0 = src_vector(p, &inst->SrcReg[0], program);
708 src1 = src_vector(p, &inst->SrcReg[1], program);
709 tmp = i915_get_utemp(p);
710 flags = get_result_flags(inst);
711
712 /* XXX: masking on intermediate values, here and elsewhere.
713 */
714 i915_emit_arith(p,
715 A0_LOG,
716 tmp, A0_DEST_CHANNEL_X, 0,
717 swizzle(src0, X, X, X, X), 0, 0);
718
719 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
720
721
722 i915_emit_arith(p,
723 A0_EXP,
724 get_result_vector(p, inst),
725 flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
726
727 break;
728
729 case OPCODE_RCP:
730 src0 = src_vector(p, &inst->SrcReg[0], program);
731
732 i915_emit_arith(p,
733 A0_RCP,
734 get_result_vector(p, inst),
735 get_result_flags(inst), 0,
736 swizzle(src0, X, X, X, X), 0, 0);
737 break;
738
739 case OPCODE_RSQ:
740
741 src0 = src_vector(p, &inst->SrcReg[0], program);
742
743 i915_emit_arith(p,
744 A0_RSQ,
745 get_result_vector(p, inst),
746 get_result_flags(inst), 0,
747 swizzle(src0, X, X, X, X), 0, 0);
748 break;
749
750 case OPCODE_SCS:
751 src0 = src_vector(p, &inst->SrcReg[0], program);
752 tmp = i915_get_utemp(p);
753
754 /*
755 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
756 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
757 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
758 * scs.x = DP4 t1, sin_constants
759 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
760 * scs.y = DP4 t1, cos_constants
761 */
762 i915_emit_arith(p,
763 A0_MUL,
764 tmp, A0_DEST_CHANNEL_XY, 0,
765 swizzle(src0, X, X, ONE, ONE),
766 swizzle(src0, X, ONE, ONE, ONE), 0);
767
768 i915_emit_arith(p,
769 A0_MUL,
770 tmp, A0_DEST_CHANNEL_ALL, 0,
771 swizzle(tmp, X, Y, X, Y),
772 swizzle(tmp, X, X, ONE, ONE), 0);
773
774 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
775 GLuint tmp1;
776
777 if (inst->DstReg.WriteMask & WRITEMASK_X)
778 tmp1 = i915_get_utemp(p);
779 else
780 tmp1 = tmp;
781
782 i915_emit_arith(p,
783 A0_MUL,
784 tmp1, A0_DEST_CHANNEL_ALL, 0,
785 swizzle(tmp, X, Y, Y, W),
786 swizzle(tmp, X, Z, ONE, ONE), 0);
787
788 i915_emit_arith(p,
789 A0_DP4,
790 get_result_vector(p, inst),
791 A0_DEST_CHANNEL_Y, 0,
792 swizzle(tmp1, W, Z, Y, X),
793 i915_emit_const4fv(p, sin_constants), 0);
794 }
795
796 if (inst->DstReg.WriteMask & WRITEMASK_X) {
797 i915_emit_arith(p,
798 A0_MUL,
799 tmp, A0_DEST_CHANNEL_XYZ, 0,
800 swizzle(tmp, X, X, Z, ONE),
801 swizzle(tmp, Z, ONE, ONE, ONE), 0);
802
803 i915_emit_arith(p,
804 A0_DP4,
805 get_result_vector(p, inst),
806 A0_DEST_CHANNEL_X, 0,
807 swizzle(tmp, ONE, Z, Y, X),
808 i915_emit_const4fv(p, cos_constants), 0);
809 }
810 break;
811
812 case OPCODE_SEQ:
813 tmp = i915_get_utemp(p);
814 flags = get_result_flags(inst);
815 dst = get_result_vector(p, inst);
816
817 /* If both operands are uniforms or constants, we get 5 instructions
818 * like:
819 *
820 * U[1] = MOV CONST[1]
821 * U[0].xyz = SGE CONST[0].xxxx, U[1]
822 * U[1] = MOV CONST[1].-x-y-z-w
823 * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1]
824 * R[0].xyz = MUL R[0], U[0]
825 *
826 * This code is stupid. Instead of having the individual calls to
827 * i915_emit_arith generate the moves to utemps, do it in the caller.
828 * This results in code like:
829 *
830 * U[1] = MOV CONST[1]
831 * U[0].xyz = SGE CONST[0].xxxx, U[1]
832 * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w
833 * R[0].xyz = MUL R[0], U[0]
834 */
835 src0 = src_vector(p, &inst->SrcReg[0], program);
836 src1 = src_vector(p, &inst->SrcReg[1], program);
837
838 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
839 && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
840 unsigned tmp = i915_get_utemp(p);
841
842 i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
843 src1, 0, 0);
844
845 src1 = tmp;
846 }
847
848 /* tmp = src1 >= src2 */
849 i915_emit_arith(p,
850 A0_SGE,
851 tmp,
852 flags, 0,
853 src0,
854 src1,
855 0);
856 /* dst = src1 <= src2 */
857 i915_emit_arith(p,
858 A0_SGE,
859 dst,
860 flags, 0,
861 negate(src0, 1, 1, 1, 1),
862 negate(src1, 1, 1, 1, 1),
863 0);
864 /* dst = tmp && dst */
865 i915_emit_arith(p,
866 A0_MUL,
867 dst,
868 flags, 0,
869 dst,
870 tmp,
871 0);
872 break;
873
874 case OPCODE_SIN:
875 src0 = src_vector(p, &inst->SrcReg[0], program);
876 tmp = i915_get_utemp(p);
877 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
878 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
879
880 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
881 i915_emit_arith(p,
882 A0_MAD,
883 tmp, A0_DEST_CHANNEL_X, 0,
884 src0,
885 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
886 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
887
888 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
889
890 i915_emit_arith(p,
891 A0_MAD,
892 tmp, A0_DEST_CHANNEL_X, 0,
893 tmp,
894 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
895 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
896
897 /* Compute sin using a quadratic and quartic. It gives continuity
898 * that repeating the Taylor series lacks every 2*pi, and has
899 * reduced error.
900 *
901 * The idea was described at:
902 * http://www.devmaster.net/forums/showthread.php?t=5784
903 */
904
905 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
906 i915_emit_arith(p,
907 A0_MAX,
908 tmp, A0_DEST_CHANNEL_Y, 0,
909 swizzle(tmp, ZERO, X, ZERO, ZERO),
910 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
911 0);
912
913 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
914 i915_emit_arith(p,
915 A0_MUL,
916 tmp, A0_DEST_CHANNEL_Y, 0,
917 swizzle(tmp, ZERO, X, ZERO, ZERO),
918 tmp,
919 0);
920
921 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
922 i915_emit_arith(p,
923 A0_DP3,
924 tmp, A0_DEST_CHANNEL_X, 0,
925 tmp,
926 swizzle(consts1, X, Y, ZERO, ZERO),
927 0);
928
929 /* tmp.x now contains a first approximation (y). Now, weight it
930 * against tmp.y**2 to get closer.
931 */
932 i915_emit_arith(p,
933 A0_MAX,
934 tmp, A0_DEST_CHANNEL_Y, 0,
935 swizzle(tmp, ZERO, X, ZERO, ZERO),
936 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
937 0);
938
939 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
940 i915_emit_arith(p,
941 A0_MAD,
942 tmp, A0_DEST_CHANNEL_Y, 0,
943 swizzle(tmp, ZERO, X, ZERO, ZERO),
944 swizzle(tmp, ZERO, Y, ZERO, ZERO),
945 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
946
947 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
948 i915_emit_arith(p,
949 A0_MAD,
950 get_result_vector(p, inst),
951 get_result_flags(inst), 0,
952 swizzle(consts1, W, W, W, W),
953 swizzle(tmp, Y, Y, Y, Y),
954 swizzle(tmp, X, X, X, X));
955
956 break;
957
958 case OPCODE_SGE:
959 EMIT_2ARG_ARITH(A0_SGE);
960 break;
961
962 case OPCODE_SGT:
963 i915_emit_arith(p,
964 A0_SLT,
965 get_result_vector( p, inst ),
966 get_result_flags( inst ), 0,
967 negate(src_vector( p, &inst->SrcReg[0], program),
968 1, 1, 1, 1),
969 negate(src_vector( p, &inst->SrcReg[1], program),
970 1, 1, 1, 1),
971 0);
972 break;
973
974 case OPCODE_SLE:
975 i915_emit_arith(p,
976 A0_SGE,
977 get_result_vector( p, inst ),
978 get_result_flags( inst ), 0,
979 negate(src_vector( p, &inst->SrcReg[0], program),
980 1, 1, 1, 1),
981 negate(src_vector( p, &inst->SrcReg[1], program),
982 1, 1, 1, 1),
983 0);
984 break;
985
986 case OPCODE_SLT:
987 EMIT_2ARG_ARITH(A0_SLT);
988 break;
989
990 case OPCODE_SNE:
991 tmp = i915_get_utemp(p);
992 flags = get_result_flags(inst);
993 dst = get_result_vector(p, inst);
994
995 /* If both operands are uniforms or constants, we get 5 instructions
996 * like:
997 *
998 * U[1] = MOV CONST[1]
999 * U[0].xyz = SLT CONST[0].xxxx, U[1]
1000 * U[1] = MOV CONST[1].-x-y-z-w
1001 * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1]
1002 * R[0].xyz = MUL R[0], U[0]
1003 *
1004 * This code is stupid. Instead of having the individual calls to
1005 * i915_emit_arith generate the moves to utemps, do it in the caller.
1006 * This results in code like:
1007 *
1008 * U[1] = MOV CONST[1]
1009 * U[0].xyz = SLT CONST[0].xxxx, U[1]
1010 * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1].-x-y-z-w
1011 * R[0].xyz = MUL R[0], U[0]
1012 */
1013 src0 = src_vector(p, &inst->SrcReg[0], program);
1014 src1 = src_vector(p, &inst->SrcReg[1], program);
1015
1016 if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
1017 && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
1018 unsigned tmp = i915_get_utemp(p);
1019
1020 i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
1021 src1, 0, 0);
1022
1023 src1 = tmp;
1024 }
1025
1026 /* tmp = src1 < src2 */
1027 i915_emit_arith(p,
1028 A0_SLT,
1029 tmp,
1030 flags, 0,
1031 src0,
1032 src1,
1033 0);
1034 /* dst = src1 > src2 */
1035 i915_emit_arith(p,
1036 A0_SLT,
1037 dst,
1038 flags, 0,
1039 negate(src0, 1, 1, 1, 1),
1040 negate(src1, 1, 1, 1, 1),
1041 0);
1042 /* dst = tmp || dst */
1043 i915_emit_arith(p,
1044 A0_ADD,
1045 dst,
1046 flags | A0_DEST_SATURATE, 0,
1047 dst,
1048 tmp,
1049 0);
1050 break;
1051
1052 case OPCODE_SSG:
1053 dst = get_result_vector(p, inst);
1054 flags = get_result_flags(inst);
1055 src0 = src_vector(p, &inst->SrcReg[0], program);
1056 tmp = i915_get_utemp(p);
1057
1058 /* tmp = (src < 0.0) */
1059 i915_emit_arith(p,
1060 A0_SLT,
1061 tmp,
1062 flags, 0,
1063 src0,
1064 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
1065 0);
1066
1067 /* dst = (0.0 < src) */
1068 i915_emit_arith(p,
1069 A0_SLT,
1070 dst,
1071 flags, 0,
1072 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
1073 src0,
1074 0);
1075
1076 /* dst = (src > 0.0) - (src < 0.0) */
1077 i915_emit_arith(p,
1078 A0_ADD,
1079 dst,
1080 flags, 0,
1081 dst,
1082 negate(tmp, 1, 1, 1, 1),
1083 0);
1084
1085 break;
1086
1087 case OPCODE_SUB:
1088 src0 = src_vector(p, &inst->SrcReg[0], program);
1089 src1 = src_vector(p, &inst->SrcReg[1], program);
1090
1091 i915_emit_arith(p,
1092 A0_ADD,
1093 get_result_vector(p, inst),
1094 get_result_flags(inst), 0,
1095 src0, negate(src1, 1, 1, 1, 1), 0);
1096 break;
1097
1098 case OPCODE_SWZ:
1099 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */
1100 break;
1101
1102 case OPCODE_TEX:
1103 EMIT_TEX(T0_TEXLD);
1104 break;
1105
1106 case OPCODE_TXB:
1107 EMIT_TEX(T0_TEXLDB);
1108 break;
1109
1110 case OPCODE_TXP:
1111 EMIT_TEX(T0_TEXLDP);
1112 break;
1113
1114 case OPCODE_XPD:
1115 /* Cross product:
1116 * result.x = src0.y * src1.z - src0.z * src1.y;
1117 * result.y = src0.z * src1.x - src0.x * src1.z;
1118 * result.z = src0.x * src1.y - src0.y * src1.x;
1119 * result.w = undef;
1120 */
1121 src0 = src_vector(p, &inst->SrcReg[0], program);
1122 src1 = src_vector(p, &inst->SrcReg[1], program);
1123 tmp = i915_get_utemp(p);
1124
1125 i915_emit_arith(p,
1126 A0_MUL,
1127 tmp, A0_DEST_CHANNEL_ALL, 0,
1128 swizzle(src0, Z, X, Y, ONE),
1129 swizzle(src1, Y, Z, X, ONE), 0);
1130
1131 i915_emit_arith(p,
1132 A0_MAD,
1133 get_result_vector(p, inst),
1134 get_result_flags(inst), 0,
1135 swizzle(src0, Y, Z, X, ONE),
1136 swizzle(src1, Z, X, Y, ONE),
1137 negate(tmp, 1, 1, 1, 0));
1138 break;
1139
1140 case OPCODE_END:
1141 return;
1142
1143 case OPCODE_BGNLOOP:
1144 case OPCODE_BGNSUB:
1145 case OPCODE_BRK:
1146 case OPCODE_CAL:
1147 case OPCODE_CONT:
1148 case OPCODE_DDX:
1149 case OPCODE_DDY:
1150 case OPCODE_ELSE:
1151 case OPCODE_ENDIF:
1152 case OPCODE_ENDLOOP:
1153 case OPCODE_ENDSUB:
1154 case OPCODE_IF:
1155 case OPCODE_RET:
1156 p->error = 1;
1157 i915_program_error(p, "Unsupported opcode: %s",
1158 _mesa_opcode_string(inst->Opcode));
1159 return;
1160
1161 case OPCODE_EXP:
1162 case OPCODE_LOG:
1163 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
1164 * prog_instruction.h, but apparently GLSL doesn't ever emit them.
1165 * Instead, it translates to EX2 or LG2.
1166 */
1167 case OPCODE_TXD:
1168 case OPCODE_TXL:
1169 /* These opcodes are claimed by GLSL in prog_instruction.h, but
1170 * only NV_vp/fp appears to emit them.
1171 */
1172 default:
1173 i915_program_error(p, "bad opcode: %s",
1174 _mesa_opcode_string(inst->Opcode));
1175 return;
1176 }
1177
1178 inst++;
1179 i915_release_utemps(p);
1180 }
1181 }
1182
1183 /* Rather than trying to intercept and jiggle depth writes during
1184 * emit, just move the value into its correct position at the end of
1185 * the program:
1186 */
1187 static void
1188 fixup_depth_write(struct i915_fragment_program *p)
1189 {
1190 if (p->depth_written) {
1191 GLuint depth = UREG(REG_TYPE_OD, 0);
1192
1193 i915_emit_arith(p,
1194 A0_MOV,
1195 depth, A0_DEST_CHANNEL_W, 0,
1196 swizzle(depth, X, Y, Z, Z), 0, 0);
1197 }
1198 }
1199
1200 static void
1201 check_texcoord_mapping(struct i915_fragment_program *p)
1202 {
1203 GLbitfield64 inputs = p->FragProg.Base.InputsRead;
1204 unsigned unit = 0;
1205
1206 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1207 if (inputs & VARYING_BIT_TEX(i)) {
1208 if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1209 unit++;
1210 break;
1211 }
1212 p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
1213 }
1214 if (inputs & VARYING_BIT_VAR(i)) {
1215 if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
1216 unit++;
1217 break;
1218 }
1219 p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
1220 }
1221 }
1222
1223 if (unit > p->ctx->Const.MaxTextureCoordUnits)
1224 i915_program_error(p, "Too many texcoord units");
1225 }
1226
1227 static void
1228 check_wpos(struct i915_fragment_program *p)
1229 {
1230 GLbitfield64 inputs = p->FragProg.Base.InputsRead;
1231 GLint i;
1232 unsigned unit = 0;
1233
1234 p->wpos_tex = -1;
1235
1236 if ((inputs & VARYING_BIT_POS) == 0)
1237 return;
1238
1239 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1240 unit += !!(inputs & VARYING_BIT_TEX(i));
1241 unit += !!(inputs & VARYING_BIT_VAR(i));
1242 }
1243
1244 if (unit < p->ctx->Const.MaxTextureCoordUnits)
1245 p->wpos_tex = unit;
1246 else
1247 i915_program_error(p, "No free texcoord for wpos value");
1248 }
1249
1250
1251 static void
1252 translate_program(struct i915_fragment_program *p)
1253 {
1254 struct i915_context *i915 = I915_CONTEXT(p->ctx);
1255
1256 if (INTEL_DEBUG & DEBUG_WM) {
1257 printf("fp:\n");
1258 _mesa_print_program(&p->FragProg.Base);
1259 printf("\n");
1260 }
1261
1262 i915_init_program(i915, p);
1263 check_texcoord_mapping(p);
1264 check_wpos(p);
1265 upload_program(p);
1266 fixup_depth_write(p);
1267 i915_fini_program(p);
1268
1269 p->translated = 1;
1270 }
1271
1272
1273 static void
1274 track_params(struct i915_fragment_program *p)
1275 {
1276 GLint i;
1277
1278 if (p->nr_params)
1279 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
1280
1281 for (i = 0; i < p->nr_params; i++) {
1282 GLint reg = p->param[i].reg;
1283 COPY_4V(p->constant[reg], p->param[i].values);
1284 }
1285
1286 p->params_uptodate = 1;
1287 p->on_hardware = 0; /* overkill */
1288 }
1289
1290
1291 static void
1292 i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
1293 {
1294 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1295 struct i915_context *i915 = I915_CONTEXT(ctx);
1296 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1297
1298 if (i915->current_program == p)
1299 return;
1300
1301 if (i915->current_program) {
1302 i915->current_program->on_hardware = 0;
1303 i915->current_program->params_uptodate = 0;
1304 }
1305
1306 i915->current_program = p;
1307
1308 assert(p->on_hardware == 0);
1309 assert(p->params_uptodate == 0);
1310
1311 }
1312 }
1313
1314 static struct gl_program *
1315 i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
1316 {
1317 switch (target) {
1318 case GL_VERTEX_PROGRAM_ARB:
1319 return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
1320 target, id);
1321
1322 case GL_FRAGMENT_PROGRAM_ARB:{
1323 struct i915_fragment_program *prog =
1324 CALLOC_STRUCT(i915_fragment_program);
1325 if (prog) {
1326 i915_init_program(I915_CONTEXT(ctx), prog);
1327
1328 return _mesa_init_fragment_program(ctx, &prog->FragProg,
1329 target, id);
1330 }
1331 else
1332 return NULL;
1333 }
1334
1335 default:
1336 /* Just fallback:
1337 */
1338 return _mesa_new_program(ctx, target, id);
1339 }
1340 }
1341
1342 static void
1343 i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
1344 {
1345 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1346 struct i915_context *i915 = I915_CONTEXT(ctx);
1347 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1348
1349 if (i915->current_program == p)
1350 i915->current_program = 0;
1351 }
1352
1353 _mesa_delete_program(ctx, prog);
1354 }
1355
1356
1357 static GLboolean
1358 i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
1359 {
1360 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1361 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1362
1363 if (!p->translated)
1364 translate_program(p);
1365
1366 return !p->error;
1367 }
1368 else
1369 return true;
1370 }
1371
1372 static GLboolean
1373 i915ProgramStringNotify(struct gl_context * ctx,
1374 GLenum target, struct gl_program *prog)
1375 {
1376 if (target == GL_FRAGMENT_PROGRAM_ARB) {
1377 struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
1378 p->translated = 0;
1379 }
1380
1381 (void) _tnl_program_string(ctx, target, prog);
1382
1383 /* XXX check if program is legal, within limits */
1384 return true;
1385 }
1386
1387 static void
1388 i915SamplerUniformChange(struct gl_context *ctx,
1389 GLenum target, struct gl_program *prog)
1390 {
1391 i915ProgramStringNotify(ctx, target, prog);
1392 }
1393
1394 void
1395 i915_update_program(struct gl_context *ctx)
1396 {
1397 struct intel_context *intel = intel_context(ctx);
1398 struct i915_context *i915 = i915_context(&intel->ctx);
1399 struct i915_fragment_program *fp =
1400 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1401
1402 if (i915->current_program != fp) {
1403 if (i915->current_program) {
1404 i915->current_program->on_hardware = 0;
1405 i915->current_program->params_uptodate = 0;
1406 }
1407
1408 i915->current_program = fp;
1409 }
1410
1411 if (!fp->translated)
1412 translate_program(fp);
1413
1414 FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
1415 }
1416
1417 void
1418 i915ValidateFragmentProgram(struct i915_context *i915)
1419 {
1420 struct gl_context *ctx = &i915->intel.ctx;
1421 struct intel_context *intel = intel_context(ctx);
1422 TNLcontext *tnl = TNL_CONTEXT(ctx);
1423 struct vertex_buffer *VB = &tnl->vb;
1424
1425 struct i915_fragment_program *p =
1426 (struct i915_fragment_program *) ctx->FragmentProgram._Current;
1427
1428 const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
1429 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
1430 GLuint s2 = S2_TEXCOORD_NONE;
1431 int i, offset = 0;
1432
1433 /* Important:
1434 */
1435 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
1436
1437 if (!p->translated)
1438 translate_program(p);
1439
1440 intel->vertex_attr_count = 0;
1441 intel->wpos_offset = 0;
1442 intel->coloroffset = 0;
1443 intel->specoffset = 0;
1444
1445 if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
1446 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
1447 }
1448 else {
1449 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
1450 }
1451
1452 /* Handle gl_PointSize builtin var here */
1453 if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
1454 EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
1455
1456 if (inputsRead & VARYING_BIT_COL0) {
1457 intel->coloroffset = offset / 4;
1458 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
1459 }
1460
1461 if (inputsRead & VARYING_BIT_COL1) {
1462 intel->specoffset = offset / 4;
1463 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
1464 }
1465
1466 if ((inputsRead & VARYING_BIT_FOGC)) {
1467 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
1468 }
1469
1470 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1471 if (inputsRead & VARYING_BIT_TEX(i)) {
1472 int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
1473 int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
1474
1475 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1476 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1477
1478 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
1479 }
1480 if (inputsRead & VARYING_BIT_VAR(i)) {
1481 int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
1482 int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
1483
1484 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
1485 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
1486
1487 EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
1488 }
1489 if (i == p->wpos_tex) {
1490 int wpos_size = 4 * sizeof(float);
1491 /* If WPOS is required, duplicate the XYZ position data in an
1492 * unused texture coordinate:
1493 */
1494 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1495 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
1496
1497 intel->wpos_offset = offset;
1498 EMIT_PAD(wpos_size);
1499 }
1500 }
1501
1502 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1503 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1504 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
1505
1506 /* Must do this *after* statechange, so as not to affect
1507 * buffered vertices reliant on the old state:
1508 */
1509 intel->vertex_size = _tnl_install_attrs(&intel->ctx,
1510 intel->vertex_attrs,
1511 intel->vertex_attr_count,
1512 intel->ViewportMatrix.m, 0);
1513
1514 assert(intel->prim.current_offset == intel->prim.start_offset);
1515 intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
1516 intel->prim.current_offset = intel->prim.start_offset;
1517
1518 intel->vertex_size >>= 2;
1519
1520 i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1521 i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1522
1523 assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
1524 }
1525
1526 if (!p->params_uptodate)
1527 track_params(p);
1528
1529 if (!p->on_hardware)
1530 i915_upload_program(i915, p);
1531
1532 if (INTEL_DEBUG & DEBUG_WM) {
1533 printf("i915:\n");
1534 i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
1535 }
1536 }
1537
1538 void
1539 i915InitFragProgFuncs(struct dd_function_table *functions)
1540 {
1541 functions->BindProgram = i915BindProgram;
1542 functions->NewProgram = i915NewProgram;
1543 functions->DeleteProgram = i915DeleteProgram;
1544 functions->IsProgramNative = i915IsProgramNative;
1545 functions->ProgramStringNotify = i915ProgramStringNotify;
1546 functions->SamplerUniformChange = i915SamplerUniformChange;
1547 }