179bc58e9e924096fbe982cee0ca5ea9ab4f19ec
[mesa.git] / src / mesa / drivers / dri / r300 / r300_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /*
29 * Authors:
30 * Ben Skeggs <darktama@iinet.net.au>
31 * Jerome Glisse <j.glisse@gmail.com>
32 */
33
34 /*TODO'S
35 *
36 * - COS/SIN/SCS instructions
37 * - Depth write, WPOS/FOGC inputs
38 * - FogOption
39 * - Verify results of opcodes for accuracy, I've only checked them
40 * in specific cases.
41 * - and more...
42 */
43
44 #include "glheader.h"
45 #include "macros.h"
46 #include "enums.h"
47
48 #include "program.h"
49 #include "program_instruction.h"
50 #include "r300_context.h"
51 #include "r300_fragprog.h"
52 #include "r300_reg.h"
53
54 /*
55 * Usefull macros and values
56 */
57 #define ERROR(fmt, args...) do { \
58 fprintf(stderr, "%s::%s(): " fmt "\n", \
59 __FILE__, __func__, ##args); \
60 rp->error = GL_TRUE; \
61 } while(0)
62
63 #define PFS_INVAL 0xFFFFFFFF
64 #define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->cs
65
66 #define SWIZZLE_XYZ 0
67 #define SWIZZLE_XXX 1
68 #define SWIZZLE_YYY 2
69 #define SWIZZLE_ZZZ 3
70 #define SWIZZLE_WWW 4
71 #define SWIZZLE_YZX 5
72 #define SWIZZLE_ZXY 6
73 #define SWIZZLE_WZY 7
74 #define SWIZZLE_111 8
75 #define SWIZZLE_000 9
76 #define SWIZZLE_HHH 10
77
78 #define swizzle(r, x, y, z, w) do_swizzle(rp, r, \
79 ((SWIZZLE_##x<<0)| \
80 (SWIZZLE_##y<<3)| \
81 (SWIZZLE_##z<<6)| \
82 (SWIZZLE_##w<<9)), \
83 0)
84
85 #define REG_TYPE_INPUT 0
86 #define REG_TYPE_OUTPUT 1
87 #define REG_TYPE_TEMP 2
88 #define REG_TYPE_CONST 3
89
90 #define REG_TYPE_SHIFT 0
91 #define REG_INDEX_SHIFT 2
92 #define REG_VSWZ_SHIFT 8
93 #define REG_SSWZ_SHIFT 13
94 #define REG_NEGV_SHIFT 18
95 #define REG_NEGS_SHIFT 19
96 #define REG_ABS_SHIFT 20
97 #define REG_NO_USE_SHIFT 21
98 #define REG_VALID_SHIFT 22
99
100 #define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT)
101 #define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT)
102 #define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT)
103 #define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT)
104 #define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT)
105 #define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT)
106 #define REG_ABS_MASK (0x01 << REG_ABS_SHIFT)
107 #define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT)
108 #define REG_VALID_MASK (0x01 << REG_VALID_SHIFT)
109
110 #define REG(type, index, vswz, sswz, nouse, valid) \
111 (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \
112 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \
113 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \
114 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \
115 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \
116 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
117 #define REG_GET_TYPE(reg) \
118 ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT)
119 #define REG_GET_INDEX(reg) \
120 ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT)
121 #define REG_GET_VSWZ(reg) \
122 ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT)
123 #define REG_GET_SSWZ(reg) \
124 ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT)
125 #define REG_GET_NO_USE(reg) \
126 ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT)
127 #define REG_GET_VALID(reg) \
128 ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT)
129 #define REG_SET_TYPE(reg, type) \
130 reg = ((reg & ~REG_TYPE_MASK) | \
131 ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK))
132 #define REG_SET_INDEX(reg, index) \
133 reg = ((reg & ~REG_INDEX_MASK) | \
134 ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK))
135 #define REG_SET_VSWZ(reg, vswz) \
136 reg = ((reg & ~REG_VSWZ_MASK) | \
137 ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK))
138 #define REG_SET_SSWZ(reg, sswz) \
139 reg = ((reg & ~REG_SSWZ_MASK) | \
140 ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK))
141 #define REG_SET_NO_USE(reg, nouse) \
142 reg = ((reg & ~REG_NO_USE_MASK) | \
143 ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK))
144 #define REG_SET_VALID(reg, valid) \
145 reg = ((reg & ~REG_VALID_MASK) | \
146 ((valid << REG_VALID_SHIFT) & REG_VALID_MASK))
147 #define REG_ABS(reg) \
148 reg = (reg | REG_ABS_MASK)
149 #define REG_NEGV(reg) \
150 reg = (reg | REG_NEGV_MASK)
151 #define REG_NEGS(reg) \
152 reg = (reg | REG_NEGS_MASK)
153
154
155 /*
156 * Datas structures for fragment program generation
157 */
158
159 /* description of r300 native hw instructions */
160 static const struct {
161 const char *name;
162 int argc;
163 int v_op;
164 int s_op;
165 } r300_fpop[] = {
166 { "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD },
167 { "DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4 },
168 { "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 },
169 { "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN },
170 { "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX },
171 { "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP },
172 { "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC },
173 { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2 },
174 { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2 },
175 { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP },
176 { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ },
177 { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL },
178 { "CMPH", 3, R300_FPI0_OUTC_CMPH, PFS_INVAL },
179 };
180
181
182 /* vector swizzles r300 can support natively, with a couple of
183 * cases we handle specially
184 *
185 * REG_VSWZ/REG_SSWZ is an index into this table
186 */
187 #define SLOT_VECTOR (1<<0)
188 #define SLOT_SCALAR (1<<3)
189 #define SLOT_BOTH (SLOT_VECTOR | SLOT_SCALAR)
190 #define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
191 SWIZZLE_##y, \
192 SWIZZLE_##z, \
193 SWIZZLE_ZERO))
194 static const struct r300_pfs_swizzle {
195 GLuint hash; /* swizzle value this matches */
196 GLuint base; /* base value for hw swizzle */
197 GLuint stride; /* difference in base between arg0/1/2 */
198 GLuint flags;
199 } v_swiz[] = {
200 /* native swizzles */
201 { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_VECTOR },
202 { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_VECTOR },
203 { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_VECTOR },
204 { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_VECTOR },
205 { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SCALAR },
206 { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_VECTOR },
207 { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_VECTOR },
208 { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH },
209 { MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0},
210 { MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0},
211 { PFS_INVAL, R300_FPI0_ARGC_HALF, 0, 0},
212 { PFS_INVAL, 0, 0, 0},
213 };
214
215 /* used during matching of non-native swizzles */
216 #define SWZ_X_MASK (7 << 0)
217 #define SWZ_Y_MASK (7 << 3)
218 #define SWZ_Z_MASK (7 << 6)
219 #define SWZ_W_MASK (7 << 9)
220 static const struct {
221 GLuint hash; /* used to mask matching swizzle components */
222 int mask; /* actual outmask */
223 int count; /* count of components matched */
224 } s_mask[] = {
225 { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3},
226 { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2},
227 { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2},
228 { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2},
229 { SWZ_X_MASK, 1, 1},
230 { SWZ_Y_MASK, 2, 1},
231 { SWZ_Z_MASK, 4, 1},
232 { PFS_INVAL, PFS_INVAL, PFS_INVAL}
233 };
234
235 /* mapping from SWIZZLE_* to r300 native values for scalar insns */
236 #define SWIZZLE_HALF 6
237 static const struct {
238 int base; /* hw value of swizzle */
239 int stride; /* difference between SRC0/1/2 */
240 GLuint flags;
241 } s_swiz[] = {
242 { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_VECTOR },
243 { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_VECTOR },
244 { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_VECTOR },
245 { R300_FPI2_ARGA_SRC0A , 1, SLOT_SCALAR },
246 { R300_FPI2_ARGA_ZERO , 0, 0 },
247 { R300_FPI2_ARGA_ONE , 0, 0 },
248 { R300_FPI2_ARGA_HALF , 0, 0 }
249 };
250
251 /* boiler-plate reg, for convenience */
252 static const GLuint undef = REG(REG_TYPE_TEMP,
253 0,
254 SWIZZLE_XYZ,
255 SWIZZLE_W,
256 GL_FALSE,
257 GL_FALSE);
258
259 /* constant one source */
260 static const GLuint pfs_one = REG(REG_TYPE_CONST,
261 0,
262 SWIZZLE_111,
263 SWIZZLE_ONE,
264 GL_FALSE,
265 GL_TRUE);
266
267 /* constant half source */
268 static const GLuint pfs_half = REG(REG_TYPE_CONST,
269 0,
270 SWIZZLE_HHH,
271 SWIZZLE_HALF,
272 GL_FALSE,
273 GL_TRUE);
274
275 /* constant zero source */
276 static const GLuint pfs_zero = REG(REG_TYPE_CONST,
277 0,
278 SWIZZLE_000,
279 SWIZZLE_ZERO,
280 GL_FALSE,
281 GL_TRUE);
282
283 /*
284 * Common functions prototypes
285 */
286 static void dump_program(struct r300_fragment_program *rp);
287 static void emit_arith(struct r300_fragment_program *rp, int op,
288 GLuint dest, int mask,
289 GLuint src0, GLuint src1, GLuint src2,
290 int flags);
291
292 /*
293 * Helper functions prototypes
294 */
295 static int get_hw_temp(struct r300_fragment_program *rp)
296 {
297 COMPILE_STATE;
298 int r = ffs(~cs->hwreg_in_use);
299 if (!r) {
300 ERROR("Out of hardware temps\n");
301 return 0;
302 }
303
304 cs->hwreg_in_use |= (1 << --r);
305 if (r > rp->max_temp_idx)
306 rp->max_temp_idx = r;
307
308 return r;
309 }
310
311 static int get_hw_temp_tex(struct r300_fragment_program *rp)
312 {
313 COMPILE_STATE;
314 int r;
315
316 r = ffs(~(cs->hwreg_in_use | cs->used_in_node));
317 if (!r)
318 return get_hw_temp(rp); /* Will cause an indirection */
319
320 cs->hwreg_in_use |= (1 << --r);
321 if (r > rp->max_temp_idx)
322 rp->max_temp_idx = r;
323
324 return r;
325 }
326
327 static void free_hw_temp(struct r300_fragment_program *rp, int idx)
328 {
329 COMPILE_STATE;
330 cs->hwreg_in_use &= ~(1<<idx);
331 }
332
333 static GLuint get_temp_reg(struct r300_fragment_program *rp)
334 {
335 COMPILE_STATE;
336 GLuint r = undef;
337 GLuint index;
338
339 index = ffs(~cs->temp_in_use);
340 if (!index) {
341 ERROR("Out of program temps\n");
342 return r;
343 }
344
345 cs->temp_in_use |= (1 << --index);
346 cs->temps[index].refcount = 0xFFFFFFFF;
347 cs->temps[index].reg = -1;
348
349 REG_SET_TYPE(r, REG_TYPE_TEMP);
350 REG_SET_INDEX(r, index);
351 REG_SET_VALID(r, GL_TRUE);
352 return r;
353 }
354
355 static GLuint get_temp_reg_tex(struct r300_fragment_program *rp)
356 {
357 COMPILE_STATE;
358 GLuint r = undef;
359 GLuint index;
360
361 index = ffs(~cs->temp_in_use);
362 if (!index) {
363 ERROR("Out of program temps\n");
364 return r;
365 }
366
367 cs->temp_in_use |= (1 << --index);
368 cs->temps[index].refcount = 0xFFFFFFFF;
369 cs->temps[index].reg = get_hw_temp_tex(rp);
370
371 REG_SET_TYPE(r, REG_TYPE_TEMP);
372 REG_SET_INDEX(r, index);
373 REG_SET_VALID(r, GL_TRUE);
374 return r;
375 }
376
377 static void free_temp(struct r300_fragment_program *rp, GLuint r)
378 {
379 COMPILE_STATE;
380 GLuint index = REG_GET_INDEX(r);
381
382 if (!(cs->temp_in_use & (1 << index)))
383 return;
384
385 if (REG_GET_TYPE(r) == REG_TYPE_TEMP) {
386 free_hw_temp(rp, cs->temps[index].reg);
387 cs->temps[index].reg = -1;
388 cs->temp_in_use &= ~(1 << index);
389 } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) {
390 free_hw_temp(rp, cs->inputs[index].reg);
391 cs->inputs[index].reg = -1;
392 }
393 }
394
395 static GLuint emit_param4fv(struct r300_fragment_program *rp,
396 GLfloat *values)
397 {
398 GLuint r = undef;
399 GLuint index;
400 int pidx;
401
402 pidx = rp->param_nr++;
403 index = rp->const_nr++;
404 if (pidx >= PFS_NUM_CONST_REGS || index >= PFS_NUM_CONST_REGS) {
405 ERROR("Out of const/param slots!\n");
406 return r;
407 }
408
409 rp->param[pidx].idx = index;
410 rp->param[pidx].values = values;
411 rp->params_uptodate = GL_FALSE;
412
413 REG_SET_TYPE(r, REG_TYPE_CONST);
414 REG_SET_INDEX(r, index);
415 REG_SET_VALID(r, GL_TRUE);
416 return r;
417 }
418
419 static GLuint emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
420 {
421 GLuint r = undef;
422 GLuint index;
423
424 index = rp->const_nr++;
425 if (index >= PFS_NUM_CONST_REGS) {
426 ERROR("Out of hw constants!\n");
427 return r;
428 }
429
430 COPY_4V(rp->constant[index], cp);
431
432 REG_SET_TYPE(r, REG_TYPE_CONST);
433 REG_SET_INDEX(r, index);
434 REG_SET_VALID(r, GL_TRUE);
435 return r;
436 }
437
438 static inline GLuint negate(GLuint r)
439 {
440 REG_NEGS(r);
441 REG_NEGV(r);
442 return r;
443 }
444
445 /* Hack, to prevent clobbering sources used multiple times when
446 * emulating non-native instructions
447 */
448 static inline GLuint keep(GLuint r)
449 {
450 REG_SET_NO_USE(r, GL_TRUE);
451 return r;
452 }
453
454 static inline GLuint absolute(GLuint r)
455 {
456 REG_ABS(r);
457 return r;
458 }
459
460 static int swz_native(struct r300_fragment_program *rp,
461 GLuint src,
462 GLuint *r,
463 GLuint arbneg)
464 {
465 /* Native swizzle, handle negation */
466 src = (src & ~REG_NEGS_SHIFT) |
467 (((arbneg >> 3) & 1) << REG_NEGS_SHIFT);
468
469 if ((arbneg & 0x7) == 0x0) {
470 src = src & ~REG_NEGV_MASK;
471 *r = src;
472 } else if ((arbneg & 0x7) == 0x7) {
473 src |= REG_NEGV_MASK;
474 *r = src;
475 } else {
476 if (!REG_GET_VALID(*r))
477 *r = get_temp_reg(rp);
478 src |= REG_NEGV_MASK;
479 emit_arith(rp,
480 PFS_OP_MAD,
481 *r,
482 arbneg & 0x7,
483 keep(src),
484 pfs_one,
485 pfs_zero,
486 0);
487 src = src & ~REG_NEGV_MASK;
488 emit_arith(rp,
489 PFS_OP_MAD,
490 *r,
491 (arbneg ^ 0x7) | WRITEMASK_W,
492 src,
493 pfs_one,
494 pfs_zero,
495 0);
496 }
497
498 return 3;
499 }
500
501 static int swz_emit_partial(struct r300_fragment_program *rp,
502 GLuint src,
503 GLuint *r,
504 int mask,
505 int mc,
506 GLuint arbneg)
507 {
508 GLuint tmp;
509 GLuint wmask = 0;
510
511 if (!REG_GET_VALID(*r))
512 *r = get_temp_reg(rp);
513
514 /* A partial match, VSWZ/mask define what parts of the
515 * desired swizzle we match
516 */
517 if (mc + s_mask[mask].count == 3) {
518 wmask = WRITEMASK_W;
519 src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT;
520 }
521
522 tmp = arbneg & s_mask[mask].mask;
523 if (tmp) {
524 tmp = tmp ^ s_mask[mask].mask;
525 if (tmp) {
526 emit_arith(rp,
527 PFS_OP_MAD,
528 *r,
529 arbneg & s_mask[mask].mask,
530 keep(src) | REG_NEGV_MASK,
531 pfs_one,
532 pfs_zero,
533 0);
534 if (!wmask) {
535 REG_SET_NO_USE(src, GL_TRUE);
536 } else {
537 REG_SET_NO_USE(src, GL_FALSE);
538 }
539 emit_arith(rp,
540 PFS_OP_MAD,
541 *r,
542 tmp | wmask,
543 src,
544 pfs_one,
545 pfs_zero,
546 0);
547 } else {
548 if (!wmask) {
549 REG_SET_NO_USE(src, GL_TRUE);
550 } else {
551 REG_SET_NO_USE(src, GL_FALSE);
552 }
553 emit_arith(rp,
554 PFS_OP_MAD,
555 *r,
556 (arbneg & s_mask[mask].mask) | wmask,
557 src | REG_NEGV_MASK,
558 pfs_one,
559 pfs_zero,
560 0);
561 }
562 } else {
563 if (!wmask) {
564 REG_SET_NO_USE(src, GL_TRUE);
565 } else {
566 REG_SET_NO_USE(src, GL_FALSE);
567 }
568 emit_arith(rp, PFS_OP_MAD,
569 *r,
570 s_mask[mask].mask | wmask,
571 src,
572 pfs_one,
573 pfs_zero,
574 0);
575 }
576
577 return s_mask[mask].count;
578 }
579
580 static GLuint do_swizzle(struct r300_fragment_program *rp,
581 GLuint src,
582 GLuint arbswz,
583 GLuint arbneg)
584 {
585 GLuint r = undef;
586 GLuint vswz;
587 int c_mask = 0;
588 int v_match = 0;
589
590 /* If swizzling from something without an XYZW native swizzle,
591 * emit result to a temp, and do new swizzle from the temp.
592 */
593 if (REG_GET_VSWZ(src) != SWIZZLE_XYZ ||
594 REG_GET_SSWZ(src) != SWIZZLE_W) {
595 GLuint temp = get_temp_reg(rp);
596 emit_arith(rp,
597 PFS_OP_MAD,
598 temp,
599 WRITEMASK_XYZW,
600 src,
601 pfs_one,
602 pfs_zero,
603 0);
604 src = temp;
605 }
606
607 /* set scalar swizzling */
608 REG_SET_SSWZ(src, GET_SWZ(arbswz, 3));
609
610 do {
611 vswz = REG_GET_VSWZ(src);
612 do {
613 int chash;
614
615 REG_SET_VSWZ(src, vswz);
616 chash = v_swiz[REG_GET_VSWZ(src)].hash &
617 s_mask[c_mask].hash;
618
619 if (chash == (arbswz & s_mask[c_mask].hash)) {
620 if (s_mask[c_mask].count == 3) {
621 v_match += swz_native(rp,
622 src,
623 &r,
624 arbneg);
625 } else {
626 v_match += swz_emit_partial(rp,
627 src,
628 &r,
629 c_mask,
630 v_match,
631 arbneg);
632 }
633
634 if (v_match == 3)
635 return r;
636
637 /* Fill with something invalid.. all 0's was
638 * wrong before, matched SWIZZLE_X. So all
639 * 1's will be okay for now
640 */
641 arbswz |= (PFS_INVAL & s_mask[c_mask].hash);
642 }
643 } while(v_swiz[++vswz].hash != PFS_INVAL);
644 REG_SET_VSWZ(src, SWIZZLE_XYZ);
645 } while (s_mask[++c_mask].hash != PFS_INVAL);
646
647 ERROR("should NEVER get here\n");
648 return r;
649 }
650
651 static GLuint t_src(struct r300_fragment_program *rp,
652 struct prog_src_register fpsrc)
653 {
654 GLuint r = undef;
655
656 switch (fpsrc.File) {
657 case PROGRAM_TEMPORARY:
658 REG_SET_INDEX(r, fpsrc.Index);
659 REG_SET_VALID(r, GL_TRUE);
660 REG_SET_TYPE(r, REG_TYPE_TEMP);
661 break;
662 case PROGRAM_INPUT:
663 REG_SET_INDEX(r, fpsrc.Index);
664 REG_SET_VALID(r, GL_TRUE);
665 REG_SET_TYPE(r, REG_TYPE_INPUT);
666 break;
667 case PROGRAM_LOCAL_PARAM:
668 r = emit_param4fv(rp,
669 rp->mesa_program.Base.LocalParams[fpsrc.Index]);
670 break;
671 case PROGRAM_ENV_PARAM:
672 r = emit_param4fv(rp,
673 rp->ctx->FragmentProgram.Parameters[fpsrc.Index]);
674 break;
675 case PROGRAM_STATE_VAR:
676 case PROGRAM_NAMED_PARAM:
677 r = emit_param4fv(rp,
678 rp->mesa_program.Base.Parameters->ParameterValues[fpsrc.Index]);
679 break;
680 default:
681 ERROR("unknown SrcReg->File %x\n", fpsrc.File);
682 return r;
683 }
684
685 /* no point swizzling ONE/ZERO/HALF constants... */
686 if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO)
687 r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase);
688 return r;
689 }
690
691 static GLuint t_scalar_src(struct r300_fragment_program *rp,
692 struct prog_src_register fpsrc)
693 {
694 struct prog_src_register src = fpsrc;
695 int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */
696
697 src.Swizzle = ((sc<<0)|(sc<<3)|(sc<<6)|(sc<<9));
698
699 return t_src(rp, src);
700 }
701
702 static GLuint t_dst(struct r300_fragment_program *rp,
703 struct prog_dst_register dest)
704 {
705 GLuint r = undef;
706
707 switch (dest.File) {
708 case PROGRAM_TEMPORARY:
709 REG_SET_INDEX(r, dest.Index);
710 REG_SET_VALID(r, GL_TRUE);
711 REG_SET_TYPE(r, REG_TYPE_TEMP);
712 return r;
713 case PROGRAM_OUTPUT:
714 REG_SET_TYPE(r, REG_TYPE_OUTPUT);
715 switch (dest.Index) {
716 case FRAG_RESULT_COLR:
717 case FRAG_RESULT_DEPR:
718 REG_SET_INDEX(r, dest.Index);
719 REG_SET_VALID(r, GL_TRUE);
720 return r;
721 default:
722 ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
723 return r;
724 }
725 default:
726 ERROR("Bad DstReg->File 0x%x\n", dest.File);
727 return r;
728 }
729 }
730
731 static int t_hw_src(struct r300_fragment_program *rp,
732 GLuint src,
733 GLboolean tex)
734 {
735 COMPILE_STATE;
736 int idx;
737 int index = REG_GET_INDEX(src);
738
739 switch(REG_GET_TYPE(src)) {
740 case REG_TYPE_TEMP:
741 /* NOTE: if reg==-1 here, a source is being read that
742 * hasn't been written to. Undefined results
743 */
744 if (cs->temps[index].reg == -1)
745 cs->temps[index].reg = get_hw_temp(rp);
746
747 idx = cs->temps[index].reg;
748
749 if (!REG_GET_NO_USE(src) &&
750 (--cs->temps[index].refcount == 0))
751 free_temp(rp, src);
752 break;
753 case REG_TYPE_INPUT:
754 idx = cs->inputs[index].reg;
755
756 if (!REG_GET_NO_USE(src) &&
757 (--cs->inputs[index].refcount == 0))
758 free_hw_temp(rp, cs->inputs[index].reg);
759 break;
760 case REG_TYPE_CONST:
761 return (index | SRC_CONST);
762 default:
763 ERROR("Invalid type for source reg\n");
764 return (0 | SRC_CONST);
765 }
766
767 if (!tex)
768 cs->used_in_node |= (1 << idx);
769
770 return idx;
771 }
772
773 static int t_hw_dst(struct r300_fragment_program *rp,
774 GLuint dest,
775 GLboolean tex)
776 {
777 COMPILE_STATE;
778 int idx;
779 GLuint index = REG_GET_INDEX(dest);
780 assert(REG_GET_VALID(dest));
781
782 switch(REG_GET_TYPE(dest)) {
783 case REG_TYPE_TEMP:
784 if (cs->temps[REG_GET_INDEX(dest)].reg == -1) {
785 if (!tex) {
786 cs->temps[index].reg = get_hw_temp(rp);
787 } else {
788 cs->temps[index].reg = get_hw_temp_tex(rp);
789 }
790 }
791 idx = cs->temps[index].reg;
792
793 if (!REG_GET_NO_USE(dest) &&
794 (--cs->temps[index].refcount == 0))
795 free_temp(rp, dest);
796
797 cs->dest_in_node |= (1 << idx);
798 cs->used_in_node |= (1 << idx);
799 break;
800 case REG_TYPE_OUTPUT:
801 switch(index) {
802 case FRAG_RESULT_COLR:
803 rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_COLOR;
804 break;
805 case FRAG_RESULT_DEPR:
806 rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_DEPTH;
807 break;
808 }
809 return index;
810 break;
811 default:
812 ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest));
813 return 0;
814 }
815
816 return idx;
817 }
818
819 static void emit_nop(struct r300_fragment_program *rp,
820 GLuint mask,
821 GLboolean sync)
822 {
823 COMPILE_STATE;
824
825 if (sync)
826 cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
827
828 if (mask & WRITEMASK_XYZ) {
829 rp->alu.inst[cs->v_pos].inst0 = NOP_INST0;
830 rp->alu.inst[cs->v_pos].inst1 = NOP_INST1;
831 cs->v_pos++;
832 }
833
834 if (mask & WRITEMASK_W) {
835 rp->alu.inst[cs->s_pos].inst2 = NOP_INST2;
836 rp->alu.inst[cs->s_pos].inst3 = NOP_INST3;
837 cs->s_pos++;
838 }
839 }
840
841 static void emit_tex(struct r300_fragment_program *rp,
842 struct prog_instruction *fpi,
843 int opcode)
844 {
845 COMPILE_STATE;
846 GLuint coord = t_src(rp, fpi->SrcReg[0]);
847 GLuint dest = undef, rdest = undef;
848 GLuint din = cs->dest_in_node, uin = cs->used_in_node;
849 int unit = fpi->TexSrcUnit;
850 int hwsrc, hwdest;
851
852 /* Resolve source/dest to hardware registers */
853 hwsrc = t_hw_src(rp, coord, GL_TRUE);
854 if (opcode != R300_FPITX_OP_KIL) {
855 dest = t_dst(rp, fpi->DstReg);
856
857 /* r300 doesn't seem to be able to do TEX->output reg */
858 if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
859 rdest = dest;
860 dest = get_temp_reg_tex(rp);
861 }
862 hwdest = t_hw_dst(rp, dest, GL_TRUE);
863
864 /* Use a temp that hasn't been used in this node, rather
865 * than causing an indirection
866 */
867 if (uin & (1 << hwdest)) {
868 free_hw_temp(rp, hwdest);
869 hwdest = get_hw_temp_tex(rp);
870 cs->temps[REG_GET_INDEX(dest)].reg = hwdest;
871 }
872 } else {
873 hwdest = 0;
874 unit = 0;
875 }
876
877 /* Indirection if source has been written in this node, or if the
878 * dest has been read/written in this node
879 */
880 if ((REG_GET_TYPE(coord) != REG_TYPE_CONST &&
881 (din & (1<<hwsrc))) || (uin & (1<<hwdest))) {
882
883 /* Finish off current node */
884 cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
885 if (rp->node[rp->cur_node].alu_offset == cs->v_pos) {
886 /* No alu instructions in the node? Emit a NOP. */
887 emit_nop(rp, WRITEMASK_XYZW, GL_TRUE);
888 cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
889 }
890
891 rp->node[rp->cur_node].alu_end =
892 cs->v_pos - rp->node[rp->cur_node].alu_offset - 1;
893 assert(rp->node[rp->cur_node].alu_end >= 0);
894
895 if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) {
896 ERROR("too many levels of texture indirection\n");
897 return;
898 }
899
900 /* Start new node */
901 rp->node[rp->cur_node].tex_offset = rp->tex.length;
902 rp->node[rp->cur_node].alu_offset = cs->v_pos;
903 rp->node[rp->cur_node].tex_end = -1;
904 rp->node[rp->cur_node].alu_end = -1;
905 rp->node[rp->cur_node].flags = 0;
906 cs->used_in_node = 0;
907 cs->dest_in_node = 0;
908 }
909
910 if (rp->cur_node == 0)
911 rp->first_node_has_tex = 1;
912
913 rp->tex.inst[rp->tex.length++] = 0
914 | (hwsrc << R300_FPITX_SRC_SHIFT)
915 | (hwdest << R300_FPITX_DST_SHIFT)
916 | (unit << R300_FPITX_IMAGE_SHIFT)
917 /* not entirely sure about this */
918 | (opcode << R300_FPITX_OPCODE_SHIFT);
919
920 cs->dest_in_node |= (1 << hwdest);
921 if (REG_GET_TYPE(coord) != REG_TYPE_CONST)
922 cs->used_in_node |= (1 << hwsrc);
923
924 rp->node[rp->cur_node].tex_end++;
925
926 /* Copy from temp to output if needed */
927 if (REG_GET_VALID(rdest)) {
928 emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest,
929 pfs_one, pfs_zero, 0);
930 free_temp(rp, dest);
931 }
932 }
933
934 /* Add sources to FPI1/FPI3 lists. If source is already on list,
935 * reuse the index instead of wasting a source.
936 */
937 static int add_src(struct r300_fragment_program *rp,
938 int reg,
939 int pos,
940 int srcmask)
941 {
942 COMPILE_STATE;
943 int csm, i;
944
945 /* Look for matches */
946 for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {
947 /* If sources have been allocated in this position(s)... */
948 if ((cs->slot[pos].umask & csm) == csm) {
949 /* ... and the register number(s) match, re-use the
950 source */
951 if (srcmask == SLOT_VECTOR &&
952 cs->slot[pos].vsrc[i] == reg)
953 return i;
954 if (srcmask == SLOT_SCALAR &&
955 cs->slot[pos].ssrc[i] == reg)
956 return i;
957 if (srcmask == SLOT_BOTH &&
958 cs->slot[pos].vsrc[i] == reg &&
959 cs->slot[pos].ssrc[i] == reg)
960 return i;
961 }
962 }
963
964 /* Look for free spaces */
965 for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) {
966 /* If the position(s) haven't been allocated */
967 if ((cs->slot[pos].umask & csm) == 0) {
968 cs->slot[pos].umask |= csm;
969
970 if (srcmask & SLOT_VECTOR)
971 cs->slot[pos].vsrc[i] = reg;
972 if (srcmask & SLOT_SCALAR)
973 cs->slot[pos].ssrc[i] = reg;
974 return i;
975 }
976 }
977
978 //ERROR("Failed to allocate sources in FPI1/FPI3!\n");
979 return 0;
980 }
981
982 /* Determine whether or not to position opcode in the same ALU slot for both
983 * vector and scalar portions of an instruction.
984 *
985 * It's not necessary to force the first case, but it makes disassembled
986 * shaders easier to read.
987 */
988 static GLboolean force_same_slot(int vop,
989 int sop,
990 GLboolean emit_vop,
991 GLboolean emit_sop,
992 int argc,
993 GLuint *src)
994 {
995 int i;
996
997 if (emit_vop && emit_sop)
998 return GL_TRUE;
999
1000 if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA)
1001 return GL_TRUE;
1002
1003 if (emit_vop) {
1004 for (i=0;i<argc;i++)
1005 if (REG_GET_VSWZ(src[i]) == SWIZZLE_WZY)
1006 return GL_TRUE;
1007 }
1008
1009 return GL_FALSE;
1010 }
1011
1012 static void emit_arith(struct r300_fragment_program *rp,
1013 int op,
1014 GLuint dest,
1015 int mask,
1016 GLuint src0,
1017 GLuint src1,
1018 GLuint src2,
1019 int flags)
1020 {
1021 COMPILE_STATE;
1022 GLuint src[3] = { src0, src1, src2 };
1023 int hwsrc[3], sswz[3], vswz[3];
1024 int hwdest;
1025 GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE;
1026 int vop, sop, argc;
1027 int vpos, spos;
1028 int i;
1029
1030 vop = r300_fpop[op].v_op;
1031 sop = r300_fpop[op].s_op;
1032 argc = r300_fpop[op].argc;
1033
1034 if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3)
1035 emit_vop = GL_TRUE;
1036 if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA)
1037 emit_sop = GL_TRUE;
1038
1039 if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT &&
1040 REG_GET_INDEX(dest) == FRAG_RESULT_DEPR)
1041 emit_vop = GL_FALSE;
1042
1043 if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) {
1044 vpos = spos = MAX2(cs->v_pos, cs->s_pos);
1045 } else {
1046 vpos = cs->v_pos;
1047 spos = cs->s_pos;
1048 /* Here is where we'd decide on where a safe place is to
1049 * combine this instruction with a previous one.
1050 *
1051 * This is extremely simple for now.. if a source depends
1052 * on the opposite stream, force the same instruction.
1053 */
1054 for (i=0;i<3;i++) {
1055 if (emit_vop &&
1056 (v_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_SCALAR)) {
1057 vpos = spos = MAX2(vpos, spos);
1058 break;
1059 }
1060 if (emit_sop &&
1061 (s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) {
1062 vpos = spos = MAX2(vpos, spos);
1063 break;
1064 }
1065 }
1066 }
1067
1068 /* - Convert src->hwsrc, record for FPI1/FPI3
1069 * - Determine ARG parts of FPI0/FPI2, unused args are filled
1070 * with ARG_ZERO.
1071 */
1072 for (i=0;i<3;i++) {
1073 int srcpos;
1074
1075 if (i >= argc) {
1076 vswz[i] = R300_FPI0_ARGC_ZERO;
1077 sswz[i] = R300_FPI2_ARGA_ZERO;
1078 continue;
1079 }
1080
1081 hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE);
1082
1083 if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) {
1084 srcpos = add_src(rp, hwsrc[i], vpos,
1085 v_swiz[REG_GET_VSWZ(src[i])].flags);
1086 vswz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base +
1087 (srcpos *
1088 v_swiz[REG_GET_VSWZ(src[i])].stride)) |
1089 ((src[i] & REG_NEGV_MASK) ? ARG_NEG : 0) |
1090 ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
1091 } else vswz[i] = R300_FPI0_ARGC_ZERO;
1092
1093 if (emit_sop) {
1094 srcpos = add_src(rp, hwsrc[i], spos,
1095 s_swiz[REG_GET_SSWZ(src[i])].flags);
1096 sswz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base +
1097 (srcpos *
1098 s_swiz[REG_GET_SSWZ(src[i])].stride)) |
1099 ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) |
1100 ((src[i] & REG_ABS_MASK) ? ARG_ABS : 0);
1101 } else sswz[i] = R300_FPI2_ARGA_ZERO;
1102 }
1103 hwdest = t_hw_dst(rp, dest, GL_FALSE);
1104
1105 if (flags & PFS_FLAG_SAT) {
1106 vop |= R300_FPI0_OUTC_SAT;
1107 sop |= R300_FPI2_OUTA_SAT;
1108 }
1109
1110 /* Throw the pieces together and get FPI0/1 */
1111 rp->alu.inst[vpos].inst1 =
1112 ((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) |
1113 (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) |
1114 (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT));
1115 if (emit_vop) {
1116 rp->alu.inst[vpos].inst0 = vop |
1117 (vswz[0] << R300_FPI0_ARG0C_SHIFT) |
1118 (vswz[1] << R300_FPI0_ARG1C_SHIFT) |
1119 (vswz[2] << R300_FPI0_ARG2C_SHIFT);
1120
1121 rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT;
1122 if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
1123 if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
1124 rp->alu.inst[vpos].inst1 |=
1125 (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT;
1126 } else assert(0);
1127 } else {
1128 rp->alu.inst[vpos].inst1 |=
1129 (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT;
1130 }
1131 cs->v_pos = vpos+1;
1132 } else if (spos >= vpos)
1133 rp->alu.inst[spos].inst0 = NOP_INST0;
1134
1135 /* And now FPI2/3 */
1136 rp->alu.inst[spos].inst3 =
1137 ((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) |
1138 (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) |
1139 (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT));
1140 if (emit_sop) {
1141 rp->alu.inst[spos].inst2 = sop |
1142 sswz[0] << R300_FPI2_ARG0A_SHIFT |
1143 sswz[1] << R300_FPI2_ARG1A_SHIFT |
1144 sswz[2] << R300_FPI2_ARG2A_SHIFT;
1145
1146 if (mask & WRITEMASK_W) {
1147 if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
1148 if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) {
1149 rp->alu.inst[spos].inst3 |=
1150 (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT;
1151 } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) {
1152 rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH;
1153 } else assert(0);
1154 } else {
1155 rp->alu.inst[spos].inst3 |=
1156 (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG;
1157 }
1158 }
1159 cs->s_pos = spos+1;
1160 } else if (vpos >= spos)
1161 rp->alu.inst[vpos].inst2 = NOP_INST2;
1162
1163 return;
1164 }
1165
1166 #if 0
1167 static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr)
1168 {
1169 struct gl_fragment_program *mp = &rp->mesa_program;
1170 GLuint r = undef;
1171
1172 if (!(mp->Base.InputsRead & (1<<attr))) {
1173 ERROR("Attribute %d was not provided!\n", attr);
1174 return undef;
1175 }
1176
1177 REG_SET_TYPE(r, REG_TYPE_INPUT);
1178 REG_SET_INDEX(r, attr);
1179 REG_SET_VALID(r, GL_TRUE);
1180 return r;
1181 }
1182 #endif
1183
1184 static GLboolean parse_program(struct r300_fragment_program *rp)
1185 {
1186 struct gl_fragment_program *mp = &rp->mesa_program;
1187 const struct prog_instruction *inst = mp->Base.Instructions;
1188 struct prog_instruction *fpi;
1189 GLuint src[3], dest, temp;
1190 GLuint cnst;
1191 int flags, mask = 0;
1192 GLfloat cnstv[4] = {0.0, 0.0, 0.0, 0.0};
1193
1194 if (!inst || inst[0].Opcode == OPCODE_END) {
1195 ERROR("empty program?\n");
1196 return GL_FALSE;
1197 }
1198
1199 for (fpi=mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1200 if (fpi->SaturateMode == SATURATE_ZERO_ONE)
1201 flags = PFS_FLAG_SAT;
1202 else
1203 flags = 0;
1204
1205 if (fpi->Opcode != OPCODE_KIL) {
1206 dest = t_dst(rp, fpi->DstReg);
1207 mask = fpi->DstReg.WriteMask;
1208 }
1209
1210 switch (fpi->Opcode) {
1211 case OPCODE_ABS:
1212 src[0] = t_src(rp, fpi->SrcReg[0]);
1213 emit_arith(rp, PFS_OP_MAD, dest, mask,
1214 absolute(src[0]), pfs_one, pfs_zero,
1215 flags);
1216 break;
1217 case OPCODE_ADD:
1218 src[0] = t_src(rp, fpi->SrcReg[0]);
1219 src[1] = t_src(rp, fpi->SrcReg[1]);
1220 emit_arith(rp, PFS_OP_MAD, dest, mask,
1221 src[0], pfs_one, src[1],
1222 flags);
1223 break;
1224 case OPCODE_CMP:
1225 src[0] = t_src(rp, fpi->SrcReg[0]);
1226 src[1] = t_src(rp, fpi->SrcReg[1]);
1227 src[2] = t_src(rp, fpi->SrcReg[2]);
1228 /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c
1229 * r300 - if src2.c < 0.0 ? src1.c : src0.c
1230 */
1231 emit_arith(rp, PFS_OP_CMP, dest, mask,
1232 src[2], src[1], src[0],
1233 flags);
1234 break;
1235 case OPCODE_COS:
1236 /*
1237 * cos using taylor serie:
1238 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6!
1239 */
1240 temp = get_temp_reg(rp);
1241 cnstv[0] = 0.5;
1242 cnstv[1] = 0.041666667;
1243 cnstv[2] = 0.001388889;
1244 cnstv[4] = 0.0;
1245 cnst = emit_const4fv(rp, cnstv);
1246 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1247
1248 emit_arith(rp, PFS_OP_MAD, temp,
1249 WRITEMASK_XYZ,
1250 src[0],
1251 src[0],
1252 pfs_zero,
1253 flags);
1254 emit_arith(rp, PFS_OP_MAD, temp,
1255 WRITEMASK_Y | WRITEMASK_Z,
1256 temp, temp,
1257 pfs_zero,
1258 flags);
1259 emit_arith(rp, PFS_OP_MAD, temp,
1260 WRITEMASK_Z,
1261 temp,
1262 swizzle(temp, X, X, X, W),
1263 pfs_zero,
1264 flags);
1265 emit_arith(rp, PFS_OP_MAD, temp,
1266 WRITEMASK_XYZ,
1267 temp, cnst,
1268 pfs_zero,
1269 flags);
1270 emit_arith(rp, PFS_OP_MAD, temp,
1271 WRITEMASK_X,
1272 pfs_one,
1273 pfs_one,
1274 negate(temp),
1275 flags);
1276 emit_arith(rp, PFS_OP_MAD, temp,
1277 WRITEMASK_X,
1278 temp,
1279 pfs_one,
1280 swizzle(temp, Y, Y, Y, W),
1281 flags);
1282 emit_arith(rp, PFS_OP_MAD, temp,
1283 WRITEMASK_X,
1284 temp,
1285 pfs_one,
1286 negate(swizzle(temp, Z, Z, Z, W)),
1287 flags);
1288 emit_arith(rp, PFS_OP_MAD, dest, mask,
1289 swizzle(temp, X, X, X, X),
1290 pfs_one,
1291 pfs_zero,
1292 flags);
1293 free_temp(rp, temp);
1294 break;
1295 case OPCODE_DP3:
1296 src[0] = t_src(rp, fpi->SrcReg[0]);
1297 src[1] = t_src(rp, fpi->SrcReg[1]);
1298 emit_arith(rp, PFS_OP_DP3, dest, mask,
1299 src[0], src[1], undef,
1300 flags);
1301 break;
1302 case OPCODE_DP4:
1303 src[0] = t_src(rp, fpi->SrcReg[0]);
1304 src[1] = t_src(rp, fpi->SrcReg[1]);
1305 emit_arith(rp, PFS_OP_DP4, dest, mask,
1306 src[0], src[1], undef,
1307 flags);
1308 break;
1309 case OPCODE_DPH:
1310 src[0] = t_src(rp, fpi->SrcReg[0]);
1311 src[1] = t_src(rp, fpi->SrcReg[1]);
1312 /* src0.xyz1 -> temp
1313 * DP4 dest, temp, src1
1314 */
1315 #if 0
1316 temp = get_temp_reg(rp);
1317 src[0].s_swz = SWIZZLE_ONE;
1318 emit_arith(rp, PFS_OP_MAD, temp, mask,
1319 src[0], pfs_one, pfs_zero,
1320 0);
1321 emit_arith(rp, PFS_OP_DP4, dest, mask,
1322 temp, src[1], undef,
1323 flags);
1324 free_temp(rp, temp);
1325 #else
1326 emit_arith(rp, PFS_OP_DP4, dest, mask,
1327 swizzle(src[0], X, Y, Z, ONE), src[1],
1328 undef, flags);
1329 #endif
1330 break;
1331 case OPCODE_DST:
1332 src[0] = t_src(rp, fpi->SrcReg[0]);
1333 src[1] = t_src(rp, fpi->SrcReg[1]);
1334 /* dest.y = src0.y * src1.y */
1335 if (mask & WRITEMASK_Y)
1336 emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
1337 keep(src[0]), keep(src[1]),
1338 pfs_zero, flags);
1339 /* dest.z = src0.z */
1340 if (mask & WRITEMASK_Z)
1341 emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Z,
1342 src[0], pfs_one, pfs_zero, flags);
1343 /* result.x = 1.0
1344 * result.w = src1.w */
1345 if (mask & WRITEMASK_XW) {
1346 REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat*/
1347 emit_arith(rp, PFS_OP_MAD, dest,
1348 mask & WRITEMASK_XW,
1349 src[1], pfs_one, pfs_zero,
1350 flags);
1351 }
1352 break;
1353 case OPCODE_EX2:
1354 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1355 emit_arith(rp, PFS_OP_EX2, dest, mask,
1356 src[0], undef, undef,
1357 flags);
1358 break;
1359 case OPCODE_FLR:
1360 src[0] = t_src(rp, fpi->SrcReg[0]);
1361 temp = get_temp_reg(rp);
1362 /* FRC temp, src0
1363 * MAD dest, src0, 1.0, -temp
1364 */
1365 emit_arith(rp, PFS_OP_FRC, temp, mask,
1366 keep(src[0]), undef, undef,
1367 0);
1368 emit_arith(rp, PFS_OP_MAD, dest, mask,
1369 src[0], pfs_one, negate(temp),
1370 flags);
1371 free_temp(rp, temp);
1372 break;
1373 case OPCODE_FRC:
1374 src[0] = t_src(rp, fpi->SrcReg[0]);
1375 emit_arith(rp, PFS_OP_FRC, dest, mask,
1376 src[0], undef, undef,
1377 flags);
1378 break;
1379 case OPCODE_KIL:
1380 emit_tex(rp, fpi, R300_FPITX_OP_KIL);
1381 break;
1382 case OPCODE_LG2:
1383 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1384 emit_arith(rp, PFS_OP_LG2, dest, mask,
1385 src[0], undef, undef,
1386 flags);
1387 break;
1388 case OPCODE_LIT:
1389 /* LIT
1390 * if (s.x < 0) t.x = 0; else t.x = s.x;
1391 * if (s.y < 0) t.y = 0; else t.y = s.y;
1392 * if (s.w > 128.0) t.w = 128.0; else t.w = s.w;
1393 * if (s.w < -128.0) t.w = -128.0; else t.w = s.w;
1394 * r.x = 1.0
1395 * if (t.x > 0) r.y = pow(t.y, t.w); else r.y = 0;
1396 * Also r.y = 0 if t.y < 0
1397 * For the t.x > 0 FGLRX use the CMPH opcode which
1398 * change the compare to (t.x + 0.5) > 0.5 we may
1399 * save one instruction by doing CMP -t.x
1400 */
1401 cnstv[0] = cnstv[1] = cnstv[2] = cnstv[4] = 0.50001;
1402 src[0] = t_src(rp, fpi->SrcReg[0]);
1403 temp = get_temp_reg(rp);
1404 cnst = emit_const4fv(rp, cnstv);
1405 emit_arith(rp, PFS_OP_CMP, temp,
1406 WRITEMASK_X | WRITEMASK_Y,
1407 src[0], pfs_zero, src[0], flags);
1408 emit_arith(rp, PFS_OP_MIN, temp, WRITEMASK_Z,
1409 swizzle(keep(src[0]), W, W, W, W),
1410 cnst, undef, flags);
1411 emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
1412 swizzle(temp, Y, Y, Y, Y),
1413 undef, undef, flags);
1414 emit_arith(rp, PFS_OP_MAX, temp, WRITEMASK_Z,
1415 temp, negate(cnst), undef, flags);
1416 emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
1417 temp, swizzle(temp, Z, Z, Z, Z),
1418 pfs_zero, flags);
1419 emit_arith(rp, PFS_OP_EX2, temp, WRITEMASK_W,
1420 temp, undef, undef, flags);
1421 emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
1422 swizzle(keep(temp), X, X, X, X),
1423 pfs_one, pfs_zero, flags);
1424 #if 0
1425 emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
1426 temp, pfs_one, pfs_half, flags);
1427 emit_arith(rp, PFS_OP_CMPH, temp, WRITEMASK_Z,
1428 swizzle(keep(temp), W, W, W, W),
1429 pfs_zero, swizzle(keep(temp), X, X, X, X),
1430 flags);
1431 #else
1432 emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
1433 pfs_zero,
1434 swizzle(keep(temp), W, W, W, W),
1435 negate(swizzle(keep(temp), X, X, X, X)),
1436 flags);
1437 #endif
1438 emit_arith(rp, PFS_OP_CMP, dest, WRITEMASK_Z,
1439 pfs_zero, temp,
1440 negate(swizzle(keep(temp), Y, Y, Y, Y)),
1441 flags);
1442 emit_arith(rp, PFS_OP_MAD, dest,
1443 WRITEMASK_X | WRITEMASK_W,
1444 pfs_one,
1445 pfs_one,
1446 pfs_zero,
1447 flags);
1448 free_temp(rp, temp);
1449 break;
1450 case OPCODE_LRP:
1451 src[0] = t_src(rp, fpi->SrcReg[0]);
1452 src[1] = t_src(rp, fpi->SrcReg[1]);
1453 src[2] = t_src(rp, fpi->SrcReg[2]);
1454 /* result = tmp0tmp1 + (1 - tmp0)tmp2
1455 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2
1456 * MAD temp, -tmp0, tmp2, tmp2
1457 * MAD result, tmp0, tmp1, temp
1458 */
1459 temp = get_temp_reg(rp);
1460 emit_arith(rp, PFS_OP_MAD, temp, mask,
1461 negate(keep(src[0])), keep(src[2]), src[2],
1462 0);
1463 emit_arith(rp, PFS_OP_MAD, dest, mask,
1464 src[0], src[1], temp,
1465 flags);
1466 free_temp(rp, temp);
1467 break;
1468 case OPCODE_MAD:
1469 src[0] = t_src(rp, fpi->SrcReg[0]);
1470 src[1] = t_src(rp, fpi->SrcReg[1]);
1471 src[2] = t_src(rp, fpi->SrcReg[2]);
1472 emit_arith(rp, PFS_OP_MAD, dest, mask,
1473 src[0], src[1], src[2],
1474 flags);
1475 break;
1476 case OPCODE_MAX:
1477 src[0] = t_src(rp, fpi->SrcReg[0]);
1478 src[1] = t_src(rp, fpi->SrcReg[1]);
1479 emit_arith(rp, PFS_OP_MAX, dest, mask,
1480 src[0], src[1], undef,
1481 flags);
1482 break;
1483 case OPCODE_MIN:
1484 src[0] = t_src(rp, fpi->SrcReg[0]);
1485 src[1] = t_src(rp, fpi->SrcReg[1]);
1486 emit_arith(rp, PFS_OP_MIN, dest, mask,
1487 src[0], src[1], undef,
1488 flags);
1489 break;
1490 case OPCODE_MOV:
1491 case OPCODE_SWZ:
1492 src[0] = t_src(rp, fpi->SrcReg[0]);
1493 emit_arith(rp, PFS_OP_MAD, dest, mask,
1494 src[0], pfs_one, pfs_zero,
1495 flags);
1496 break;
1497 case OPCODE_MUL:
1498 src[0] = t_src(rp, fpi->SrcReg[0]);
1499 src[1] = t_src(rp, fpi->SrcReg[1]);
1500 emit_arith(rp, PFS_OP_MAD, dest, mask,
1501 src[0], src[1], pfs_zero,
1502 flags);
1503 break;
1504 case OPCODE_POW:
1505 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1506 src[1] = t_scalar_src(rp, fpi->SrcReg[1]);
1507 temp = get_temp_reg(rp);
1508 emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
1509 src[0], undef, undef,
1510 0);
1511 emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
1512 temp, src[1], pfs_zero,
1513 0);
1514 emit_arith(rp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask,
1515 temp, undef, undef,
1516 0);
1517 free_temp(rp, temp);
1518 break;
1519 case OPCODE_RCP:
1520 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1521 emit_arith(rp, PFS_OP_RCP, dest, mask,
1522 src[0], undef, undef,
1523 flags);
1524 break;
1525 case OPCODE_RSQ:
1526 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1527 emit_arith(rp, PFS_OP_RSQ, dest, mask,
1528 absolute(src[0]), pfs_zero, pfs_zero,
1529 flags);
1530 break;
1531 case OPCODE_SCS:
1532 ERROR("SCS not implemented\n");
1533 break;
1534 case OPCODE_SGE:
1535 src[0] = t_src(rp, fpi->SrcReg[0]);
1536 src[1] = t_src(rp, fpi->SrcReg[1]);
1537 temp = get_temp_reg(rp);
1538 /* temp = src0 - src1
1539 * dest.c = (temp.c < 0.0) ? 0 : 1
1540 */
1541 emit_arith(rp, PFS_OP_MAD, temp, mask,
1542 src[0], pfs_one, negate(src[1]),
1543 0);
1544 emit_arith(rp, PFS_OP_CMP, dest, mask,
1545 pfs_one, pfs_zero, temp,
1546 0);
1547 free_temp(rp, temp);
1548 break;
1549 case OPCODE_SIN:
1550 /*
1551 * sin using taylor serie:
1552 * sin(x) = x - x^3/3! + x^5/5! - x^7/7!
1553 */
1554 temp = get_temp_reg(rp);
1555 cnstv[0] = 0.333333333;
1556 cnstv[1] = 0.008333333;
1557 cnstv[2] = 0.000198413;
1558 cnstv[4] = 0.0;
1559 cnst = emit_const4fv(rp, cnstv);
1560 src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
1561
1562 emit_arith(rp, PFS_OP_MAD, temp,
1563 WRITEMASK_XYZ,
1564 src[0],
1565 src[0],
1566 pfs_zero,
1567 flags);
1568 emit_arith(rp, PFS_OP_MAD, temp,
1569 WRITEMASK_Y | WRITEMASK_Z,
1570 temp, temp,
1571 pfs_zero,
1572 flags);
1573 emit_arith(rp, PFS_OP_MAD, temp,
1574 WRITEMASK_Z,
1575 temp,
1576 swizzle(temp, X, X, X, W),
1577 pfs_zero,
1578 flags);
1579 emit_arith(rp, PFS_OP_MAD, temp,
1580 WRITEMASK_XYZ,
1581 src[0],
1582 temp,
1583 pfs_zero,
1584 flags);
1585 emit_arith(rp, PFS_OP_MAD, temp,
1586 WRITEMASK_XYZ,
1587 temp, cnst,
1588 pfs_zero,
1589 flags);
1590 emit_arith(rp, PFS_OP_MAD, temp,
1591 WRITEMASK_X,
1592 src[0],
1593 pfs_one,
1594 negate(temp),
1595 flags);
1596 emit_arith(rp, PFS_OP_MAD, temp,
1597 WRITEMASK_X,
1598 temp,
1599 pfs_one,
1600 swizzle(temp, Y, Y, Y, W),
1601 flags);
1602 emit_arith(rp, PFS_OP_MAD, temp,
1603 WRITEMASK_X,
1604 temp,
1605 pfs_one,
1606 negate(swizzle(temp, Z, Z, Z, W)),
1607 flags);
1608 emit_arith(rp, PFS_OP_MAD, dest, mask,
1609 swizzle(temp, X, X, X, X),
1610 pfs_one,
1611 pfs_zero,
1612 flags);
1613 free_temp(rp, temp);
1614 break;
1615 case OPCODE_SLT:
1616 src[0] = t_src(rp, fpi->SrcReg[0]);
1617 src[1] = t_src(rp, fpi->SrcReg[1]);
1618 temp = get_temp_reg(rp);
1619 /* temp = src0 - src1
1620 * dest.c = (temp.c < 0.0) ? 1 : 0
1621 */
1622 emit_arith(rp, PFS_OP_MAD, temp, mask,
1623 src[0], pfs_one, negate(src[1]),
1624 0);
1625 emit_arith(rp, PFS_OP_CMP, dest, mask,
1626 pfs_zero, pfs_one, temp,
1627 0);
1628 free_temp(rp, temp);
1629 break;
1630 case OPCODE_SUB:
1631 src[0] = t_src(rp, fpi->SrcReg[0]);
1632 src[1] = t_src(rp, fpi->SrcReg[1]);
1633 emit_arith(rp, PFS_OP_MAD, dest, mask,
1634 src[0], pfs_one, negate(src[1]),
1635 flags);
1636 break;
1637 case OPCODE_TEX:
1638 emit_tex(rp, fpi, R300_FPITX_OP_TEX);
1639 break;
1640 case OPCODE_TXB:
1641 emit_tex(rp, fpi, R300_FPITX_OP_TXB);
1642 break;
1643 case OPCODE_TXP:
1644 emit_tex(rp, fpi, R300_FPITX_OP_TXP);
1645 break;
1646 case OPCODE_XPD: {
1647 src[0] = t_src(rp, fpi->SrcReg[0]);
1648 src[1] = t_src(rp, fpi->SrcReg[1]);
1649 temp = get_temp_reg(rp);
1650 /* temp = src0.zxy * src1.yzx */
1651 emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ,
1652 swizzle(keep(src[0]), Z, X, Y, W),
1653 swizzle(keep(src[1]), Y, Z, X, W),
1654 pfs_zero,
1655 0);
1656 /* dest.xyz = src0.yzx * src1.zxy - temp
1657 * dest.w = undefined
1658 * */
1659 emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ,
1660 swizzle(src[0], Y, Z, X, W),
1661 swizzle(src[1], Z, X, Y, W),
1662 negate(temp),
1663 flags);
1664 /* cleanup */
1665 free_temp(rp, temp);
1666 break;
1667 }
1668 default:
1669 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
1670 break;
1671 }
1672
1673 if (rp->error)
1674 return GL_FALSE;
1675
1676 }
1677
1678 return GL_TRUE;
1679 }
1680
1681 /* - Init structures
1682 * - Determine what hwregs each input corresponds to
1683 */
1684 static void init_program(struct r300_fragment_program *rp)
1685 {
1686 struct r300_pfs_compile_state *cs = NULL;
1687 struct gl_fragment_program *mp = &rp->mesa_program;
1688 struct prog_instruction *fpi;
1689 GLuint InputsRead = mp->Base.InputsRead;
1690 GLuint temps_used = 0; /* for rp->temps[] */
1691 int i,j;
1692
1693 /* New compile, reset tracking data */
1694 rp->translated = GL_FALSE;
1695 rp->error = GL_FALSE;
1696 rp->cs = cs = &(R300_CONTEXT(rp->ctx)->state.pfs_compile);
1697 rp->tex.length = 0;
1698 rp->cur_node = 0;
1699 rp->first_node_has_tex = 0;
1700 rp->const_nr = 0;
1701 rp->param_nr = 0;
1702 rp->params_uptodate = GL_FALSE;
1703 rp->max_temp_idx = 0;
1704 rp->node[0].alu_end = -1;
1705 rp->node[0].tex_end = -1;
1706
1707 _mesa_memset(cs, 0, sizeof(*rp->cs));
1708 for (i=0;i<PFS_MAX_ALU_INST;i++) {
1709 for (j=0;j<3;j++) {
1710 cs->slot[i].vsrc[j] = SRC_CONST;
1711 cs->slot[i].ssrc[j] = SRC_CONST;
1712 }
1713 }
1714
1715 /* Work out what temps the Mesa inputs correspond to, this must match
1716 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1717 * configures itself based on the fragprog's InputsRead
1718 *
1719 * NOTE: this depends on get_hw_temp() allocating registers in order,
1720 * starting from register 0.
1721 */
1722
1723 /* Texcoords come first */
1724 for (i=0;i<rp->ctx->Const.MaxTextureUnits;i++) {
1725 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1726 cs->inputs[FRAG_ATTRIB_TEX0+i].refcount = 0;
1727 cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp);
1728 }
1729 }
1730 InputsRead &= ~FRAG_BITS_TEX_ANY;
1731
1732 /* fragment position treated as a texcoord */
1733 if (InputsRead & FRAG_BIT_WPOS) {
1734 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1735 cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(rp);
1736 }
1737 InputsRead &= ~FRAG_BIT_WPOS;
1738
1739 /* Then primary colour */
1740 if (InputsRead & FRAG_BIT_COL0) {
1741 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1742 cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp);
1743 }
1744 InputsRead &= ~FRAG_BIT_COL0;
1745
1746 /* Secondary color */
1747 if (InputsRead & FRAG_BIT_COL1) {
1748 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1749 cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp);
1750 }
1751 InputsRead &= ~FRAG_BIT_COL1;
1752
1753 /* Anything else */
1754 if (InputsRead) {
1755 WARN_ONCE("Don't know how to handle inputs 0x%x\n",
1756 InputsRead);
1757 /* force read from hwreg 0 for now */
1758 for (i=0;i<32;i++)
1759 if (InputsRead & (1<<i)) cs->inputs[i].reg = 0;
1760 }
1761
1762 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
1763 * That way, we can free up the reg when it's no longer needed
1764 */
1765 if (!mp->Base.Instructions) {
1766 ERROR("No instructions found in program\n");
1767 return;
1768 }
1769
1770 for (fpi=mp->Base.Instructions;fpi->Opcode != OPCODE_END; fpi++) {
1771 int idx;
1772
1773 for (i=0;i<3;i++) {
1774 idx = fpi->SrcReg[i].Index;
1775 switch (fpi->SrcReg[i].File) {
1776 case PROGRAM_TEMPORARY:
1777 if (!(temps_used & (1<<idx))) {
1778 cs->temps[idx].reg = -1;
1779 cs->temps[idx].refcount = 1;
1780 temps_used |= (1 << idx);
1781 } else
1782 cs->temps[idx].refcount++;
1783 break;
1784 case PROGRAM_INPUT:
1785 cs->inputs[idx].refcount++;
1786 break;
1787 default: break;
1788 }
1789 }
1790
1791 idx = fpi->DstReg.Index;
1792 if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
1793 if (!(temps_used & (1<<idx))) {
1794 cs->temps[idx].reg = -1;
1795 cs->temps[idx].refcount = 1;
1796 temps_used |= (1 << idx);
1797 } else
1798 cs->temps[idx].refcount++;
1799 }
1800 }
1801 cs->temp_in_use = temps_used;
1802 }
1803
1804 static void update_params(struct r300_fragment_program *rp)
1805 {
1806 struct gl_fragment_program *mp = &rp->mesa_program;
1807 int i;
1808
1809 /* Ask Mesa nicely to fill in ParameterValues for us */
1810 if (rp->param_nr)
1811 _mesa_load_state_parameters(rp->ctx, mp->Base.Parameters);
1812
1813 for (i=0;i<rp->param_nr;i++)
1814 COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values);
1815
1816 rp->params_uptodate = GL_TRUE;
1817 }
1818
1819 void r300_translate_fragment_shader(struct r300_fragment_program *rp)
1820 {
1821 struct r300_pfs_compile_state *cs = NULL;
1822
1823 if (!rp->translated) {
1824
1825 init_program(rp);
1826 cs = rp->cs;
1827
1828 if (parse_program(rp) == GL_FALSE) {
1829 dump_program(rp);
1830 return;
1831 }
1832
1833 /* Finish off */
1834 cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos);
1835 rp->node[rp->cur_node].alu_end =
1836 cs->v_pos - rp->node[rp->cur_node].alu_offset - 1;
1837 if (rp->node[rp->cur_node].tex_end < 0)
1838 rp->node[rp->cur_node].tex_end = 0;
1839 rp->alu_offset = 0;
1840 rp->alu_end = cs->v_pos - 1;
1841 rp->tex_offset = 0;
1842 rp->tex_end = rp->tex.length ? rp->tex.length - 1 : 0;
1843 assert(rp->node[rp->cur_node].alu_end >= 0);
1844 assert(rp->alu_end >= 0);
1845
1846 rp->translated = GL_TRUE;
1847 if (0) dump_program(rp);
1848 }
1849
1850 update_params(rp);
1851 }
1852
1853 /* just some random things... */
1854 static void dump_program(struct r300_fragment_program *rp)
1855 {
1856 int i;
1857 static int pc = 0;
1858
1859 fprintf(stderr, "pc=%d*************************************\n", pc++);
1860
1861 fprintf(stderr, "Mesa program:\n");
1862 fprintf(stderr, "-------------\n");
1863 _mesa_print_program(&rp->mesa_program.Base);
1864 fflush(stdout);
1865
1866 fprintf(stderr, "Hardware program\n");
1867 fprintf(stderr, "----------------\n");
1868
1869 fprintf(stderr, "tex:\n");
1870
1871 for(i=0;i<rp->tex.length;i++) {
1872 fprintf(stderr, "%08x\n", rp->tex.inst[i]);
1873 }
1874
1875 for (i=0;i<(rp->cur_node+1);i++) {
1876 fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\
1877 "alu_end: %d, tex_end: %d\n", i,
1878 rp->node[i].alu_offset,
1879 rp->node[i].tex_offset,
1880 rp->node[i].alu_end,
1881 rp->node[i].tex_end);
1882 }
1883
1884 fprintf(stderr, "%08x\n",
1885 ((rp->tex_end << 16) | (R300_PFS_TEXI_0 >> 2)));
1886 for (i=0;i<=rp->tex_end;i++)
1887 fprintf(stderr, "%08x\n", rp->tex.inst[i]);
1888
1889 /* dump program in pretty_print_command_stream.tcl-readable format */
1890 fprintf(stderr, "%08x\n",
1891 ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2)));
1892 for (i=0;i<=rp->alu_end;i++)
1893 fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0);
1894
1895 fprintf(stderr, "%08x\n",
1896 ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2)));
1897 for (i=0;i<=rp->alu_end;i++)
1898 fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1);
1899
1900 fprintf(stderr, "%08x\n",
1901 ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2)));
1902 for (i=0;i<=rp->alu_end;i++)
1903 fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2);
1904
1905 fprintf(stderr, "%08x\n",
1906 ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2)));
1907 for (i=0;i<=rp->alu_end;i++)
1908 fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3);
1909
1910 fprintf(stderr, "00000000\n");
1911 }