r5xx: More trig work.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 /* Writemasks */
97 #define R500_WRITEMASK_ARGB 0xF
98
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI[] = {0.15915494309189535,
102 0.15915494309189535,
103 0.15915494309189535,
104 0.15915494309189535};
105
106 static void dump_program(struct r500_fragment_program *fp);
107
108 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
109 GLuint swiz = 0x0;
110 GLuint temp;
111 /* This could be optimized, but it should be plenty fast already. */
112 int i;
113 for (i = 0; i < 3; i++) {
114 temp = GET_SWZ(src.Swizzle, i);
115 /* Fix SWIZZLE_ONE */
116 if (temp == 5) temp++;
117 swiz += temp << i*3;
118 }
119 return swiz;
120 }
121
122 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
123 GLuint swiz = GET_SWZ(src.Swizzle, 3);
124
125 if (swiz == 5) swiz++;
126 return swiz;
127 }
128
129 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
130 GLuint swiz = GET_SWZ(src.Swizzle, 0);
131
132 if (swiz == 5) swiz++;
133 return swiz;
134 }
135
136 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
137 GLuint swiz = 0x0;
138 GLuint temp = src.Swizzle;
139 int i;
140 for (i = 0; i < 4; i++) {
141 swiz += (temp & 0x3) << i*2;
142 temp >>= 3;
143 }
144 return swiz;
145 }
146
147 static int get_temp(struct r500_fragment_program *fp, int slot) {
148
149 COMPILE_STATE;
150
151 int r = cs->temp_in_use + 1 + slot;
152
153 if (r > R500_US_NUM_TEMP_REGS) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
155 }
156
157 return r;
158 }
159
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint emit_const4fv(struct r500_fragment_program *fp,
162 const GLfloat * cp)
163 {
164 GLuint reg = 0x0;
165 int index;
166
167 for (index = 0; index < fp->const_nr; ++index) {
168 if (fp->constant[index] == cp)
169 break;
170 }
171
172 if (index >= fp->const_nr) {
173 if (index >= R500_US_NUM_CONST_REGS) {
174 ERROR("Out of hw constants!\n");
175 return reg;
176 }
177
178 fp->const_nr++;
179 fp->constant[index] = cp;
180 }
181
182 reg = index | REG_CONSTANT;
183 return reg;
184 }
185
186 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
187 COMPILE_STATE;
188 GLuint reg;
189 switch (src.File) {
190 case PROGRAM_TEMPORARY:
191 reg = src.Index + fp->temp_reg_offset;
192 break;
193 case PROGRAM_INPUT:
194 reg = cs->inputs[src.Index].reg;
195 break;
196 case PROGRAM_LOCAL_PARAM:
197 reg = emit_const4fv(fp,
198 fp->mesa_program.Base.LocalParams[src.
199 Index]);
200 break;
201 case PROGRAM_ENV_PARAM:
202 reg = emit_const4fv(fp,
203 fp->ctx->FragmentProgram.Parameters[src.
204 Index]);
205 break;
206 case PROGRAM_STATE_VAR:
207 case PROGRAM_NAMED_PARAM:
208 case PROGRAM_CONSTANT:
209 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
210 ParameterValues[src.Index]);
211 break;
212 default:
213 ERROR("Can't handle src.File %x\n", src.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
221 GLuint reg;
222 switch (dest.File) {
223 case PROGRAM_TEMPORARY:
224 reg = dest.Index + fp->temp_reg_offset;
225 break;
226 case PROGRAM_OUTPUT:
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
229 reg = dest.Index;
230 break;
231 default:
232 ERROR("Can't handle dest.File %x\n", dest.File);
233 reg = 0x0;
234 break;
235 }
236 return reg;
237 }
238
239 static void emit_tex(struct r500_fragment_program *fp,
240 struct prog_instruction *fpi, int opcode, int dest, int counter)
241 {
242 int hwsrc, hwdest;
243 GLuint mask;
244
245 mask = fpi->DstReg.WriteMask << 11;
246 hwsrc = make_src(fp, fpi->SrcReg[0]);
247
248 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
249 hwdest = get_temp(fp, 0);
250 } else {
251 hwdest = dest;
252 }
253
254 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
255 | R500_INST_TEX_SEM_WAIT;
256
257 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
258 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
259
260 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
261 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
262
263 switch (opcode) {
264 case OPCODE_KIL:
265 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
266 break;
267 case OPCODE_TEX:
268 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
269 break;
270 case OPCODE_TXB:
271 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
272 break;
273 case OPCODE_TXP:
274 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
275 break;
276 default:
277 ERROR("emit_tex can't handle opcode %x\n", opcode);
278 }
279
280 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest)
285 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
287
288 fp->inst[counter].inst3 = 0x0;
289 fp->inst[counter].inst4 = 0x0;
290 fp->inst[counter].inst5 = 0x0;
291
292 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
293 counter++;
294 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT | (mask << 4);
296 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
297 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
298 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
302 | R500_ALU_RGB_OMOD_DISABLE;
303 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest)
305 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
306 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
307 | R500_ALPHA_OMOD_DISABLE;
308 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
312 }
313 }
314
315 static void dumb_shader(struct r500_fragment_program *fp)
316 {
317 fp->inst[0].inst0 = R500_INST_TYPE_TEX
318 | R500_INST_TEX_SEM_WAIT
319 | R500_INST_RGB_WMASK_R
320 | R500_INST_RGB_WMASK_G
321 | R500_INST_RGB_WMASK_B
322 | R500_INST_ALPHA_WMASK
323 | R500_INST_RGB_CLAMP
324 | R500_INST_ALPHA_CLAMP;
325 fp->inst[0].inst1 = R500_TEX_ID(0)
326 | R500_TEX_INST_LD
327 | R500_TEX_SEM_ACQUIRE
328 | R500_TEX_IGNORE_UNCOVERED;
329 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
330 | R500_TEX_SRC_S_SWIZ_R
331 | R500_TEX_SRC_T_SWIZ_G
332 | R500_TEX_DST_ADDR(0)
333 | R500_TEX_DST_R_SWIZ_R
334 | R500_TEX_DST_G_SWIZ_G
335 | R500_TEX_DST_B_SWIZ_B
336 | R500_TEX_DST_A_SWIZ_A;
337 fp->inst[0].inst3 = R500_DX_ADDR(0)
338 | R500_DX_S_SWIZ_R
339 | R500_DX_T_SWIZ_R
340 | R500_DX_R_SWIZ_R
341 | R500_DX_Q_SWIZ_R
342 | R500_DY_ADDR(0)
343 | R500_DY_S_SWIZ_R
344 | R500_DY_T_SWIZ_R
345 | R500_DY_R_SWIZ_R
346 | R500_DY_Q_SWIZ_R;
347 fp->inst[0].inst4 = 0x0;
348 fp->inst[0].inst5 = 0x0;
349
350 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
351 R500_INST_TEX_SEM_WAIT |
352 R500_INST_LAST |
353 R500_INST_RGB_OMASK_R |
354 R500_INST_RGB_OMASK_G |
355 R500_INST_RGB_OMASK_B |
356 R500_INST_ALPHA_OMASK;
357 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
358 R500_RGB_ADDR1(0) |
359 R500_RGB_ADDR1_CONST |
360 R500_RGB_ADDR2(0) |
361 R500_RGB_ADDR2_CONST |
362 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
363 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
364 R500_ALPHA_ADDR1(0) |
365 R500_ALPHA_ADDR1_CONST |
366 R500_ALPHA_ADDR2(0) |
367 R500_ALPHA_ADDR2_CONST |
368 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
369 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
370 R500_ALU_RGB_R_SWIZ_A_R |
371 R500_ALU_RGB_G_SWIZ_A_G |
372 R500_ALU_RGB_B_SWIZ_A_B |
373 R500_ALU_RGB_SEL_B_SRC0 |
374 R500_ALU_RGB_R_SWIZ_B_1 |
375 R500_ALU_RGB_B_SWIZ_B_1 |
376 R500_ALU_RGB_G_SWIZ_B_1;
377 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
378 R500_ALPHA_SWIZ_A_A |
379 R500_ALPHA_SWIZ_B_1;
380 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
381 R500_ALU_RGBA_R_SWIZ_0 |
382 R500_ALU_RGBA_G_SWIZ_0 |
383 R500_ALU_RGBA_B_SWIZ_0 |
384 R500_ALU_RGBA_A_SWIZ_0;
385
386 fp->cs->nrslots = 2;
387 fp->translated = GL_TRUE;
388 }
389
390 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
391 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
392 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
393 /* output_mask */
394 | (fpi->DstReg.WriteMask << 15);
395 } else {
396 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
397 /* pixel_mask */
398 | (fpi->DstReg.WriteMask << 11);
399 }
400
401 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
402 }
403
404 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
405 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
406 * it is technically more accurate and recommended by ATI/AMD. */
407 GLuint src_reg = make_src(fp, src);
408 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
409 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
410 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
411 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
412 | R500_ALU_RGB_SEL_B_SRC0
413 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
414 | R500_ALU_RGB_OMOD_DISABLE;
415 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
416 | R500_ALPHA_ADDRD(dest)
417 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
418 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
419 | R500_ALPHA_OMOD_DISABLE;
420 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
421 | R500_ALU_RGBA_ADDRD(dest)
422 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
423 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
424 }
425
426 static GLboolean parse_program(struct r500_fragment_program *fp)
427 {
428 struct gl_fragment_program *mp = &fp->mesa_program;
429 const struct prog_instruction *inst = mp->Base.Instructions;
430 struct prog_instruction *fpi;
431 GLuint src[3], dest, temp[2];
432 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
433
434 if (!inst || inst[0].Opcode == OPCODE_END) {
435 ERROR("The program is empty!\n");
436 return GL_FALSE;
437 }
438
439 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
440
441 if (fpi->Opcode != OPCODE_KIL) {
442 dest = make_dest(fp, fpi->DstReg);
443
444 pixel_mask = fpi->DstReg.WriteMask << 11;
445 output_mask = fpi->DstReg.WriteMask << 15;
446 }
447
448 switch (fpi->Opcode) {
449 case OPCODE_ABS:
450 emit_alu(fp, counter, fpi);
451 emit_mov(fp, counter, fpi->SrcReg[0], dest);
452 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
453 | R500_ALU_RGB_MOD_B_ABS;
454 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
455 | R500_ALPHA_MOD_B_ABS;
456 break;
457 case OPCODE_ADD:
458 src[0] = make_src(fp, fpi->SrcReg[0]);
459 src[1] = make_src(fp, fpi->SrcReg[1]);
460 /* Variation on MAD: 1*src0+src1 */
461 emit_alu(fp, counter, fpi);
462 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
463 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
464 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
465 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
466 fp->inst[counter].inst3 = /* 1 */
467 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
468 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
469 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
470 | R500_ALPHA_ADDRD(dest)
471 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
472 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
473 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
474 | R500_ALU_RGBA_ADDRD(dest)
475 | R500_ALU_RGBA_SEL_C_SRC1
476 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
477 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
478 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
479 break;
480 case OPCODE_CMP:
481 /* This inst's selects need to be swapped as follows:
482 * 0 -> C ; 1 -> B ; 2 -> A */
483 src[0] = make_src(fp, fpi->SrcReg[0]);
484 src[1] = make_src(fp, fpi->SrcReg[1]);
485 src[2] = make_src(fp, fpi->SrcReg[2]);
486 emit_alu(fp, counter, fpi);
487 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
488 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
489 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
490 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
491 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
492 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
493 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
494 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
495 | R500_ALPHA_ADDRD(dest)
496 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
497 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
498 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
499 | R500_ALU_RGBA_ADDRD(dest)
500 | R500_ALU_RGBA_SEL_C_SRC2
501 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
502 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
503 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
504 break;
505 case OPCODE_COS:
506 src[0] = make_src(fp, fpi->SrcReg[0]);
507 src[1] = emit_const4fv(fp, RCP_2PI);
508 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
509 | (R500_WRITEMASK_ARGB << 11);
510 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
511 | R500_RGB_ADDR1(src[1]);
512 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
513 | R500_ALPHA_ADDR1(src[1]);
514 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
515 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
516 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
517 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
518 | R500_ALPHA_ADDRD(get_temp(fp, 0))
519 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
520 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
521 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
522 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
523 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
524 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
525 counter++;
526 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
527 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
528 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
529 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
530 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
531 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
532 | R500_ALPHA_ADDRD(get_temp(fp, 1))
533 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
534 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
535 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
536 counter++;
537 emit_alu(fp, counter, fpi);
538 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
539 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
540 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
541 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
542 | R500_ALPHA_ADDRD(dest)
543 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
544 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
545 | R500_ALU_RGBA_ADDRD(dest);
546 break;
547 case OPCODE_DP3:
548 src[0] = make_src(fp, fpi->SrcReg[0]);
549 src[1] = make_src(fp, fpi->SrcReg[1]);
550 emit_alu(fp, counter, fpi);
551 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
552 | R500_RGB_ADDR1(src[1]);
553 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
554 | R500_ALPHA_ADDR1(src[1]);
555 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
556 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
557 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
558 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
559 | R500_ALPHA_ADDRD(dest)
560 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
561 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
562 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
563 | R500_ALU_RGBA_ADDRD(dest);
564 break;
565 case OPCODE_DP4:
566 src[0] = make_src(fp, fpi->SrcReg[0]);
567 src[1] = make_src(fp, fpi->SrcReg[1]);
568 /* Based on DP3 */
569 emit_alu(fp, counter, fpi);
570 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
571 | R500_RGB_ADDR1(src[1]);
572 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
573 | R500_ALPHA_ADDR1(src[1]);
574 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
575 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
576 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
577 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
578 | R500_ALPHA_ADDRD(dest)
579 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
580 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
581 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
582 | R500_ALU_RGBA_ADDRD(dest);
583 break;
584 case OPCODE_DPH:
585 src[0] = make_src(fp, fpi->SrcReg[0]);
586 src[1] = make_src(fp, fpi->SrcReg[1]);
587 /* Based on DP3 */
588 emit_alu(fp, counter, fpi);
589 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
590 | R500_RGB_ADDR1(src[1]);
591 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
592 | R500_ALPHA_ADDR1(src[1]);
593 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
594 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
595 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
596 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
597 | R500_ALPHA_ADDRD(dest)
598 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
599 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
600 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
601 | R500_ALU_RGBA_ADDRD(dest);
602 break;
603 case OPCODE_EX2:
604 src[0] = make_src(fp, fpi->SrcReg[0]);
605 emit_alu(fp, counter, fpi);
606 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
607 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
608 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
609 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
610 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
611 | R500_ALPHA_ADDRD(dest)
612 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
613 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
614 | R500_ALU_RGBA_ADDRD(dest);
615 break;
616 case OPCODE_FRC:
617 src[0] = make_src(fp, fpi->SrcReg[0]);
618 emit_alu(fp, counter, fpi);
619 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
620 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
621 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
622 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
623 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
624 | R500_ALPHA_ADDRD(dest)
625 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
626 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
627 | R500_ALU_RGBA_ADDRD(dest);
628 break;
629 case OPCODE_KIL:
630 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
631 break;
632 case OPCODE_LG2:
633 src[0] = make_src(fp, fpi->SrcReg[0]);
634 emit_alu(fp, counter, fpi);
635 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
636 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
637 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
638 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
639 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
640 | R500_ALPHA_ADDRD(dest)
641 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
642 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
643 | R500_ALU_RGBA_ADDRD(dest);
644 break;
645 case OPCODE_LRP:
646 /* src0 * src1 + INV(src0) * src2
647 * 1) MUL src0, src1, temp
648 * 2) PRE 1-src0; MAD srcp, src2, temp */
649 src[0] = make_src(fp, fpi->SrcReg[0]);
650 src[1] = make_src(fp, fpi->SrcReg[1]);
651 src[2] = make_src(fp, fpi->SrcReg[2]);
652 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
653 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
654 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
655 | R500_RGB_ADDR1(src[1]);
656 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
657 | R500_ALPHA_ADDR1(src[1]);
658 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
659 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
660 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
661 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
662 | R500_ALPHA_ADDRD(get_temp(fp, 0))
663 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
664 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
665 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
666 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
667 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
668 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
669 counter++;
670 emit_alu(fp, counter, fpi);
671 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
672 | R500_RGB_ADDR1(src[2])
673 | R500_RGB_ADDR2(get_temp(fp, 0))
674 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
675 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
676 | R500_ALPHA_ADDR1(src[2])
677 | R500_ALPHA_ADDR2(get_temp(fp, 0))
678 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
679 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
680 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
681 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
682 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
683 | R500_ALPHA_ADDRD(dest)
684 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
685 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
686 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
687 | R500_ALU_RGBA_ADDRD(dest)
688 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
689 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
690 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
691 break;
692 case OPCODE_MAD:
693 src[0] = make_src(fp, fpi->SrcReg[0]);
694 src[1] = make_src(fp, fpi->SrcReg[1]);
695 src[2] = make_src(fp, fpi->SrcReg[2]);
696 emit_alu(fp, counter, fpi);
697 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
698 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
699 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
700 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
701 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
702 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
703 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
704 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
705 | R500_ALPHA_ADDRD(dest)
706 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
707 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
708 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
709 | R500_ALU_RGBA_ADDRD(dest)
710 | R500_ALU_RGBA_SEL_C_SRC2
711 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
712 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
713 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
714 break;
715 case OPCODE_MAX:
716 src[0] = make_src(fp, fpi->SrcReg[0]);
717 src[1] = make_src(fp, fpi->SrcReg[1]);
718 emit_alu(fp, counter, fpi);
719 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
720 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
721 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
722 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
723 | R500_ALU_RGB_SEL_B_SRC1
724 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
725 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
726 | R500_ALPHA_ADDRD(dest)
727 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
728 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
729 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
730 | R500_ALU_RGBA_ADDRD(dest);
731 break;
732 case OPCODE_MIN:
733 src[0] = make_src(fp, fpi->SrcReg[0]);
734 src[1] = make_src(fp, fpi->SrcReg[1]);
735 emit_alu(fp, counter, fpi);
736 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
737 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
738 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
739 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
740 | R500_ALU_RGB_SEL_B_SRC1
741 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
742 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
743 | R500_ALPHA_ADDRD(dest)
744 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
745 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
746 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
747 | R500_ALU_RGBA_ADDRD(dest);
748 break;
749 case OPCODE_MOV:
750 emit_alu(fp, counter, fpi);
751 emit_mov(fp, counter, fpi->SrcReg[0], dest);
752 break;
753 case OPCODE_MUL:
754 src[0] = make_src(fp, fpi->SrcReg[0]);
755 src[1] = make_src(fp, fpi->SrcReg[1]);
756 /* Variation on MAD: src0*src1+0 */
757 emit_alu(fp, counter, fpi);
758 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
759 | R500_RGB_ADDR1(src[1]);
760 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
761 | R500_ALPHA_ADDR1(src[1]);
762 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
763 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
764 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
765 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
766 | R500_ALPHA_ADDRD(dest)
767 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
768 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
769 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
770 | R500_ALU_RGBA_ADDRD(dest)
771 // | R500_ALU_RGBA_SEL_C_SRC2
772 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
773 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
774 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
775 break;
776 case OPCODE_POW:
777 /* POW(a,b) = EX2(LN2(a)*b) */
778 src[0] = make_src(fp, fpi->SrcReg[0]);
779 src[1] = make_src(fp, fpi->SrcReg[1]);
780 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
781 | (R500_WRITEMASK_ARGB << 11);
782 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
783 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
784 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
785 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
786 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
787 | R500_ALPHA_ADDRD(get_temp(fp, 0))
788 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
789 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
790 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
791 counter++;
792 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
793 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
794 | R500_RGB_ADDR1(src[1]);
795 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
796 | R500_ALPHA_ADDR1(src[1]);
797 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
798 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
799 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
800 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
801 | R500_ALPHA_ADDRD(get_temp(fp, 1))
802 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
803 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
804 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
805 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
806 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
807 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
808 counter++;
809 emit_alu(fp, counter, fpi);
810 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
811 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
812 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
814 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
815 | R500_ALPHA_ADDRD(dest)
816 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
817 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
818 | R500_ALU_RGBA_ADDRD(dest);
819 break;
820 case OPCODE_RCP:
821 src[0] = make_src(fp, fpi->SrcReg[0]);
822 emit_alu(fp, counter, fpi);
823 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
824 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
825 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
826 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
827 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
828 | R500_ALPHA_ADDRD(dest)
829 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
830 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
831 | R500_ALU_RGBA_ADDRD(dest);
832 break;
833 case OPCODE_RSQ:
834 src[0] = make_src(fp, fpi->SrcReg[0]);
835 emit_alu(fp, counter, fpi);
836 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
837 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
838 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
839 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
840 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
841 | R500_ALPHA_ADDRD(dest)
842 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
843 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
844 | R500_ALU_RGBA_ADDRD(dest);
845 break;
846 case OPCODE_SCS:
847 src[0] = make_src(fp, fpi->SrcReg[0]);
848 src[1] = emit_const4fv(fp, RCP_2PI);
849 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
850 | (R500_WRITEMASK_ARGB << 11);
851 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
852 | R500_RGB_ADDR1(src[1]);
853 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
854 | R500_ALPHA_ADDR1(src[1]);
855 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
856 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
857 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
858 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
859 | R500_ALPHA_ADDRD(get_temp(fp, 0))
860 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
861 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
862 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
863 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
864 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
865 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
866 counter++;
867 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
868 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
869 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
870 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
871 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
872 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
873 | R500_ALPHA_ADDRD(get_temp(fp, 1))
874 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
875 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
876 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
877 counter++;
878 /* Do a cosine, then a sine, masking out the channels we want to protect. */
879 /* Cosine only goes in R (x) channel. */
880 fpi->DstReg.WriteMask = 0x1;
881 emit_alu(fp, counter, fpi);
882 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
883 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
884 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
885 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
886 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
887 | R500_ALPHA_ADDRD(dest)
888 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
889 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
890 | R500_ALU_RGBA_ADDRD(dest);
891 counter++;
892 /* Sine only goes in G (y) channel. */
893 fpi->DstReg.WriteMask = 0x2;
894 emit_alu(fp, counter, fpi);
895 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
896 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
897 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
898 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
899 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
900 | R500_ALPHA_ADDRD(dest)
901 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
902 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
903 | R500_ALU_RGBA_ADDRD(dest);
904 break;
905 case OPCODE_SGE:
906 /* We use SRCP, so as a precaution we're
907 * going to set NOP in previous inst, if possible. */
908 /* This inst's selects need to be swapped as follows:
909 * 0 -> C ; 1 -> B ; 2 -> A */
910 src[0] = make_src(fp, fpi->SrcReg[0]);
911 src[1] = make_src(fp, fpi->SrcReg[1]);
912 emit_alu(fp, counter, fpi);
913 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
914 | R500_RGB_ADDR1(src[1])
915 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
916 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
917 | R500_ALPHA_ADDR1(src[1])
918 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
919 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
920 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
921 | R500_ALU_RGB_SEL_B_SRC1
922 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
923 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
924 | R500_ALPHA_ADDRD(dest)
925 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
926 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
927 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
928 | R500_ALU_RGBA_ADDRD(dest)
929 | R500_ALU_RGBA_SEL_C_SRCP
930 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
931 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
932 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
933 break;
934 case OPCODE_SIN:
935 src[0] = make_src(fp, fpi->SrcReg[0]);
936 src[1] = emit_const4fv(fp, RCP_2PI);
937 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
938 | (R500_WRITEMASK_ARGB << 11);
939 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
940 | R500_RGB_ADDR1(src[1]);
941 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
942 | R500_ALPHA_ADDR1(src[1]);
943 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
944 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
945 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
946 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
947 | R500_ALPHA_ADDRD(get_temp(fp, 0))
948 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
949 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
950 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
951 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
952 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
953 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
954 counter++;
955 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
956 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
957 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
958 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
959 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
960 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
961 | R500_ALPHA_ADDRD(get_temp(fp, 1))
962 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
963 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
964 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
965 counter++;
966 emit_alu(fp, counter, fpi);
967 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
968 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
969 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
970 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
971 | R500_ALPHA_ADDRD(dest)
972 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
973 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
974 | R500_ALU_RGBA_ADDRD(dest);
975 break;
976 case OPCODE_SLT:
977 /* We use SRCP, so as a precaution we're
978 * going to set NOP in previous inst, if possible. */
979 /* This inst's selects need to be swapped as follows:
980 * 0 -> C ; 1 -> B ; 2 -> A */
981 src[0] = make_src(fp, fpi->SrcReg[0]);
982 src[1] = make_src(fp, fpi->SrcReg[1]);
983 emit_alu(fp, counter, fpi);
984 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
985 | R500_RGB_ADDR1(src[1])
986 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
987 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
988 | R500_ALPHA_ADDR1(src[1])
989 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
990 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
991 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
992 | R500_ALU_RGB_SEL_B_SRC1
993 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
994 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
995 | R500_ALPHA_ADDRD(dest)
996 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
997 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
998 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
999 | R500_ALU_RGBA_ADDRD(dest)
1000 | R500_ALU_RGBA_SEL_C_SRCP
1001 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
1002 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
1003 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
1004 break;
1005 case OPCODE_SUB:
1006 src[0] = make_src(fp, fpi->SrcReg[0]);
1007 src[1] = make_src(fp, fpi->SrcReg[1]);
1008 /* Variation on MAD: 1*src0-src1 */
1009 emit_alu(fp, counter, fpi);
1010 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1011 | R500_RGB_ADDR2(src[1]);
1012 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1013 | R500_ALPHA_ADDR2(src[1]);
1014 fp->inst[counter].inst3 = /* 1 */
1015 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1016 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1017 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1018 | R500_ALPHA_ADDRD(dest)
1019 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1020 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1021 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1022 | R500_ALU_RGBA_ADDRD(dest)
1023 | R500_ALU_RGBA_SEL_C_SRC2
1024 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1025 | R500_ALU_RGBA_MOD_C_NEG
1026 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1027 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1028 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1029 break;
1030 case OPCODE_SWZ:
1031 /* TODO: Negation masks! */
1032 emit_alu(fp, counter, fpi);
1033 emit_mov(fp, counter, fpi->SrcReg[0], dest);
1034 break;
1035 case OPCODE_TEX:
1036 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
1037 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1038 counter++;
1039 break;
1040 case OPCODE_TXB:
1041 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
1042 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1043 counter++;
1044 break;
1045 case OPCODE_TXP:
1046 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
1047 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1048 counter++;
1049 break;
1050 default:
1051 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1052 break;
1053 }
1054
1055 /* Finishing touches */
1056 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1057 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1058 }
1059
1060 counter++;
1061
1062 if (fp->error)
1063 return GL_FALSE;
1064
1065 }
1066
1067 /* Finish him! (If it's an ALU/OUT instruction...) */
1068 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1069 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1070 } else {
1071 /* We still need to put an output inst, right? */
1072 WARN_ONCE("Final FP instruction is not an OUT.\n");
1073 #if 0
1074
1075 #endif
1076 }
1077
1078 fp->cs->nrslots = counter;
1079
1080 fp->max_temp_idx++;
1081
1082 return GL_TRUE;
1083 }
1084
1085 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1086 {
1087 struct r300_pfs_compile_state *cs = NULL;
1088 struct gl_fragment_program *mp = &fp->mesa_program;
1089 struct prog_instruction *fpi;
1090 GLuint InputsRead = mp->Base.InputsRead;
1091 GLuint temps_used = 0;
1092 int i, j;
1093
1094 /* New compile, reset tracking data */
1095 fp->optimization =
1096 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1097 fp->translated = GL_FALSE;
1098 fp->error = GL_FALSE;
1099 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1100 fp->cur_node = 0;
1101 fp->first_node_has_tex = 0;
1102 fp->const_nr = 0;
1103 /* Size of pixel stack, plus 1. */
1104 fp->max_temp_idx = 1;
1105 /* Temp register offset. */
1106 fp->temp_reg_offset = 0;
1107 fp->node[0].alu_end = -1;
1108 fp->node[0].tex_end = -1;
1109
1110 _mesa_memset(cs, 0, sizeof(*fp->cs));
1111 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1112 for (j = 0; j < 3; j++) {
1113 cs->slot[i].vsrc[j] = SRC_CONST;
1114 cs->slot[i].ssrc[j] = SRC_CONST;
1115 }
1116 }
1117
1118 /* Work out what temps the Mesa inputs correspond to, this must match
1119 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1120 * configures itself based on the fragprog's InputsRead
1121 *
1122 * NOTE: this depends on get_hw_temp() allocating registers in order,
1123 * starting from register 0, so we're just going to do that instead.
1124 */
1125
1126 /* Texcoords come first */
1127 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1128 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1129 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1130 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1131 fp->temp_reg_offset;
1132 fp->temp_reg_offset++;
1133 }
1134 }
1135 InputsRead &= ~FRAG_BITS_TEX_ANY;
1136
1137 /* fragment position treated as a texcoord */
1138 if (InputsRead & FRAG_BIT_WPOS) {
1139 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1140 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1141 fp->temp_reg_offset;
1142 fp->temp_reg_offset++;
1143 }
1144 InputsRead &= ~FRAG_BIT_WPOS;
1145
1146 /* Then primary colour */
1147 if (InputsRead & FRAG_BIT_COL0) {
1148 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1149 cs->inputs[FRAG_ATTRIB_COL0].reg =
1150 fp->temp_reg_offset;
1151 fp->temp_reg_offset++;
1152 }
1153 InputsRead &= ~FRAG_BIT_COL0;
1154
1155 /* Secondary color */
1156 if (InputsRead & FRAG_BIT_COL1) {
1157 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1158 cs->inputs[FRAG_ATTRIB_COL1].reg =
1159 fp->temp_reg_offset;
1160 fp->temp_reg_offset++;
1161 }
1162 InputsRead &= ~FRAG_BIT_COL1;
1163
1164 /* Anything else */
1165 if (InputsRead) {
1166 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1167 /* force read from hwreg 0 for now */
1168 for (i = 0; i < 32; i++)
1169 if (InputsRead & (1 << i))
1170 cs->inputs[i].reg = 0;
1171 }
1172
1173 if (!mp->Base.Instructions) {
1174 ERROR("No instructions found in program, going to go die now.\n");
1175 return;
1176 }
1177
1178 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1179 for (i = 0; i < 3; i++) {
1180 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1181 if (fpi->SrcReg[i].Index > temps_used)
1182 temps_used = fpi->SrcReg[i].Index;
1183 }
1184 }
1185 }
1186
1187 cs->temp_in_use = temps_used;
1188
1189 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1190 }
1191
1192 static void update_params(struct r500_fragment_program *fp)
1193 {
1194 struct gl_fragment_program *mp = &fp->mesa_program;
1195
1196 /* Ask Mesa nicely to fill in ParameterValues for us */
1197 if (mp->Base.Parameters)
1198 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1199 }
1200
1201 void r500TranslateFragmentShader(r300ContextPtr r300,
1202 struct r500_fragment_program *fp)
1203 {
1204
1205 struct r300_pfs_compile_state *cs = NULL;
1206
1207 if (!fp->translated) {
1208
1209
1210
1211 init_program(r300, fp);
1212 cs = fp->cs;
1213
1214 if (parse_program(fp) == GL_FALSE) {
1215 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1216 dumb_shader(fp);
1217 fp->inst_offset = 0;
1218 fp->inst_end = cs->nrslots - 1;
1219 return;
1220 }
1221 fp->inst_offset = 0;
1222 fp->inst_end = cs->nrslots - 1;
1223
1224 fp->translated = GL_TRUE;
1225 if (1 || RADEON_DEBUG & DEBUG_PIXEL) {
1226 fprintf(stderr, "Mesa program:\n");
1227 fprintf(stderr, "-------------\n");
1228 _mesa_print_program(&fp->mesa_program.Base);
1229 fflush(stdout);
1230 dump_program(fp);
1231 }
1232
1233
1234 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1235 }
1236
1237 update_params(fp);
1238
1239 }
1240
1241 static char *toswiz(int swiz_val) {
1242 switch(swiz_val) {
1243 case 0: return "R";
1244 case 1: return "G";
1245 case 2: return "B";
1246 case 3: return "A";
1247 case 4: return "0";
1248 case 5: return "1/2";
1249 case 6: return "1";
1250 case 7: return "U";
1251 }
1252 return NULL;
1253 }
1254
1255 static char *toop(int op_val)
1256 {
1257 char *str;
1258 switch (op_val) {
1259 case 0: str = "MAD"; break;
1260 case 1: str = "DP3"; break;
1261 case 2: str = "DP4"; break;
1262 case 3: str = "D2A"; break;
1263 case 4: str = "MIN"; break;
1264 case 5: str = "MAX"; break;
1265 case 6: str = "Reserved"; break;
1266 case 7: str = "CND"; break;
1267 case 8: str = "CMP"; break;
1268 case 9: str = "FRC"; break;
1269 case 10: str = "SOP"; break;
1270 case 11: str = "MDH"; break;
1271 case 12: str = "MDV"; break;
1272 }
1273 return str;
1274 }
1275
1276 static char *to_alpha_op(int op_val)
1277 {
1278 char *str = NULL;
1279 switch (op_val) {
1280 case 0: str = "MAD"; break;
1281 case 1: str = "DP"; break;
1282 case 2: str = "MIN"; break;
1283 case 3: str = "MAX"; break;
1284 case 4: str = "Reserved"; break;
1285 case 5: str = "CND"; break;
1286 case 6: str = "CMP"; break;
1287 case 7: str = "FRC"; break;
1288 case 8: str = "EX2"; break;
1289 case 9: str = "LN2"; break;
1290 case 10: str = "RCP"; break;
1291 case 11: str = "RSQ"; break;
1292 case 12: str = "SIN"; break;
1293 case 13: str = "COS"; break;
1294 case 14: str = "MDH"; break;
1295 case 15: str = "MDV"; break;
1296 }
1297 return str;
1298 }
1299
1300 static char *to_mask(int val)
1301 {
1302 char *str = NULL;
1303 switch(val) {
1304 case 0: str = "NONE"; break;
1305 case 1: str = "R"; break;
1306 case 2: str = "G"; break;
1307 case 3: str = "RG"; break;
1308 case 4: str = "B"; break;
1309 case 5: str = "RB"; break;
1310 case 6: str = "GB"; break;
1311 case 7: str = "RGB"; break;
1312 case 8: str = "A"; break;
1313 case 9: str = "AR"; break;
1314 case 10: str = "AG"; break;
1315 case 11: str = "ARG"; break;
1316 case 12: str = "AB"; break;
1317 case 13: str = "ARB"; break;
1318 case 14: str = "AGB"; break;
1319 case 15: str = "ARGB"; break;
1320 }
1321 return str;
1322 }
1323
1324 static char *to_texop(int val)
1325 {
1326 switch(val) {
1327 case 0: return "NOP";
1328 case 1: return "LD";
1329 case 2: return "TEXKILL";
1330 case 3: return "PROJ";
1331 case 4: return "LODBIAS";
1332 case 5: return "LOD";
1333 case 6: return "DXDY";
1334 }
1335 return NULL;
1336 }
1337
1338 static void dump_program(struct r500_fragment_program *fp)
1339 {
1340 int pc = 0;
1341 int n;
1342 uint32_t inst;
1343 uint32_t inst0;
1344 char *str = NULL;
1345
1346 for (n = 0; n < fp->inst_end+1; n++) {
1347 inst0 = inst = fp->inst[n].inst0;
1348 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1349 switch(inst & 0x3) {
1350 case R500_INST_TYPE_ALU: str = "ALU"; break;
1351 case R500_INST_TYPE_OUT: str = "OUT"; break;
1352 case R500_INST_TYPE_FC: str = "FC"; break;
1353 case R500_INST_TYPE_TEX: str = "TEX"; break;
1354 };
1355 fprintf(stderr,"%s %s %s %s %s ", str,
1356 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1357 inst & R500_INST_LAST ? "LAST" : "",
1358 inst & R500_INST_NOP ? "NOP" : "",
1359 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1360 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1361 to_mask((inst >> 15) & 0xf));
1362
1363 switch(inst0 & 0x3) {
1364 case 0:
1365 case 1:
1366 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1367 inst = fp->inst[n].inst1;
1368
1369 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1370 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1371 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1372 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1373 (inst >> 30));
1374
1375 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1376 inst = fp->inst[n].inst2;
1377 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1378 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1379 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1380 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1381 (inst >> 30));
1382 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1383 inst = fp->inst[n].inst3;
1384 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1385 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1386 (inst >> 11) & 0x3,
1387 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1388 (inst >> 24) & 0x3);
1389
1390
1391 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1392 inst = fp->inst[n].inst4;
1393 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1394 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1395 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1396 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1397
1398 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1399 inst = fp->inst[n].inst5;
1400 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1401 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1402 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1403 (inst >> 23) & 0x3,
1404 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1405 break;
1406 case 2:
1407 break;
1408 case 3:
1409 inst = fp->inst[n].inst1;
1410 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1411 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1412 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1413 inst = fp->inst[n].inst2;
1414 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1415 inst & 127, inst & (1<<7) ? "(rel)" : "",
1416 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1417 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1418 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1419 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1420 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1421
1422 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1423 break;
1424 }
1425 fprintf(stderr,"\n");
1426 }
1427
1428 }