832763c554f757abedfc01622448a3f0ba0871bb
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 #define R500_SWIZ_MOD_NEG 1
84 #define R500_SWIZ_MOD_ABS 2
85 #define R500_SWIZ_MOD_NEG_ABS 3
86 /* Swizzles for inst2 */
87 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
88 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
89 /* Swizzles for inst3 */
90 #define MAKE_SWIZ_RGB_A(x) (x << 2)
91 #define MAKE_SWIZ_RGB_B(x) (x << 15)
92 /* Swizzles for inst4 */
93 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
95 /* Swizzle for inst5 */
96 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
97 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
98
99 /* Writemasks */
100 #define R500_WRITEMASK_ARGB 0xF
101
102 /* 1/(2pi), needed for quick modulus in trig insts
103 * Thanks to glisse for pointing out how to do it! */
104 static const GLfloat RCP_2PI[] = {0.15915494309189535,
105 0.15915494309189535,
106 0.15915494309189535,
107 0.15915494309189535};
108
109 static void dump_program(struct r500_fragment_program *fp);
110
111 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
112 GLuint swiz = 0x0;
113 GLuint temp;
114 /* This could be optimized, but it should be plenty fast already. */
115 int i;
116 for (i = 0; i < 3; i++) {
117 temp = GET_SWZ(src.Swizzle, i);
118 /* Fix SWIZZLE_ONE */
119 if (temp == 5) temp++;
120 swiz |= temp << i*3;
121 }
122 if (src.NegateBase)
123 swiz |= (R500_SWIZ_MOD_NEG << 9);
124 return swiz;
125 }
126
127 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
128 GLuint swiz = GET_SWZ(src.Swizzle, 3);
129
130 if (swiz == 5) swiz++;
131
132 if (src.NegateBase)
133 swiz |= (R500_SWIZ_MOD_NEG << 3);
134
135 return swiz;
136 }
137
138 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
139 GLuint swiz = GET_SWZ(src.Swizzle, 0);
140
141 if (swiz == 5) swiz++;
142 return swiz;
143 }
144
145 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
146 GLuint swiz = 0x0;
147 GLuint temp = src.Swizzle;
148 int i;
149 for (i = 0; i < 4; i++) {
150 swiz += (temp & 0x3) << i*2;
151 temp >>= 3;
152 }
153 return swiz;
154 }
155
156 static int get_temp(struct r500_fragment_program *fp, int slot) {
157
158 COMPILE_STATE;
159
160 int r = cs->temp_in_use + 1 + slot;
161
162 if (r > R500_US_NUM_TEMP_REGS) {
163 ERROR("Too many temporary registers requested, can't compile!\n");
164 }
165
166 return r;
167 }
168
169 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
170 static GLuint emit_const4fv(struct r500_fragment_program *fp,
171 const GLfloat * cp)
172 {
173 GLuint reg = 0x0;
174 int index;
175
176 for (index = 0; index < fp->const_nr; ++index) {
177 if (fp->constant[index] == cp)
178 break;
179 }
180
181 if (index >= fp->const_nr) {
182 if (index >= R500_US_NUM_CONST_REGS) {
183 ERROR("Out of hw constants!\n");
184 return reg;
185 }
186
187 fp->const_nr++;
188 fp->constant[index] = cp;
189 }
190
191 reg = index | REG_CONSTANT;
192 return reg;
193 }
194
195 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
196 COMPILE_STATE;
197 GLuint reg;
198 switch (src.File) {
199 case PROGRAM_TEMPORARY:
200 reg = src.Index + fp->temp_reg_offset;
201 break;
202 case PROGRAM_INPUT:
203 reg = cs->inputs[src.Index].reg;
204 break;
205 case PROGRAM_LOCAL_PARAM:
206 reg = emit_const4fv(fp,
207 fp->mesa_program.Base.LocalParams[src.
208 Index]);
209 break;
210 case PROGRAM_ENV_PARAM:
211 reg = emit_const4fv(fp,
212 fp->ctx->FragmentProgram.Parameters[src.
213 Index]);
214 break;
215 case PROGRAM_STATE_VAR:
216 case PROGRAM_NAMED_PARAM:
217 case PROGRAM_CONSTANT:
218 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
219 ParameterValues[src.Index]);
220 break;
221 default:
222 ERROR("Can't handle src.File %x\n", src.File);
223 reg = 0x0;
224 break;
225 }
226 return reg;
227 }
228
229 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
230 GLuint reg;
231 switch (dest.File) {
232 case PROGRAM_TEMPORARY:
233 reg = dest.Index + fp->temp_reg_offset;
234 break;
235 case PROGRAM_OUTPUT:
236 /* Eventually we may need to handle multiple
237 * rendering targets... */
238 reg = dest.Index;
239 break;
240 default:
241 ERROR("Can't handle dest.File %x\n", dest.File);
242 reg = 0x0;
243 break;
244 }
245 return reg;
246 }
247
248 static void emit_tex(struct r500_fragment_program *fp,
249 struct prog_instruction *fpi, int dest, int counter)
250 {
251 int hwsrc, hwdest;
252 GLuint mask;
253
254 mask = fpi->DstReg.WriteMask << 11;
255 hwsrc = make_src(fp, fpi->SrcReg[0]);
256
257 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
258 hwdest = get_temp(fp, 0);
259 } else {
260 hwdest = dest;
261 }
262
263 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
264 | R500_INST_TEX_SEM_WAIT;
265
266 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
267 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
268
269 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
270 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
271
272 switch (fpi->Opcode) {
273 case OPCODE_KIL:
274 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
275 break;
276 case OPCODE_TEX:
277 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
278 break;
279 case OPCODE_TXB:
280 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
281 break;
282 case OPCODE_TXP:
283 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
284 break;
285 default:
286 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
287 }
288
289 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
290 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
291 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
292 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
293 | R500_TEX_DST_ADDR(hwdest)
294 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
295 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
296
297 fp->inst[counter].inst3 = 0x0;
298 fp->inst[counter].inst4 = 0x0;
299 fp->inst[counter].inst5 = 0x0;
300
301 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
302 counter++;
303 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
304 | R500_INST_TEX_SEM_WAIT | (mask << 4);
305 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
306 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
307 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
308 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
309 | R500_ALU_RGB_SEL_B_SRC0
310 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
311 | R500_ALU_RGB_OMOD_DISABLE;
312 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
313 | R500_ALPHA_ADDRD(dest)
314 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
315 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
316 | R500_ALPHA_OMOD_DISABLE;
317 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
318 | R500_ALU_RGBA_ADDRD(dest)
319 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
320 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
321 }
322 }
323
324 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
325 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
326 fp->inst[counter].inst0 = R500_INST_TYPE_OUT;
327
328 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
329 fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
330
331 if (fpi->DstReg.Index == FRAG_RESULT_DEPR)
332 fp->inst[counter].inst4 = R500_ALPHA_W_OMASK;
333 } else {
334 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
335 /* pixel_mask */
336 | (fpi->DstReg.WriteMask << 11);
337 }
338
339 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
340
341 /* Ideally, we shouldn't have to explicitly clear memory here! */
342 fp->inst[counter].inst1 = 0x0;
343 fp->inst[counter].inst2 = 0x0;
344 fp->inst[counter].inst3 = 0x0;
345 fp->inst[counter].inst5 = 0x0;
346 }
347
348 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
349 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
350 * it is technically more accurate and recommended by ATI/AMD. */
351 GLuint src_reg = make_src(fp, src);
352 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
353 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
354 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
355 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
356 | R500_ALU_RGB_SEL_B_SRC0
357 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
358 | R500_ALU_RGB_OMOD_DISABLE;
359 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
360 | R500_ALPHA_ADDRD(dest)
361 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
362 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
363 | R500_ALPHA_OMOD_DISABLE;
364 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
365 | R500_ALU_RGBA_ADDRD(dest)
366 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
367 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
368 }
369
370 static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) {
371 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
372 * If you can make it pretty or fast, please do so! */
373 emit_alu(fp, counter, fpi);
374 /* Common MAD stuff */
375 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
376 | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg));
377 fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
378 | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg));
379 switch (one) {
380 case 0:
381 case 1:
382 case 2:
383 fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one]));
384 fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one]));
385 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
386 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
387 fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
388 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
389 break;
390 case R500_SWIZZLE_ZERO:
391 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
392 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
393 break;
394 case R500_SWIZZLE_ONE:
395 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
396 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
397 break;
398 default:
399 ERROR("Bad src index in emit_mad: %d\n", one);
400 break;
401 }
402 switch (two) {
403 case 0:
404 case 1:
405 case 2:
406 fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two]));
407 fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two]));
408 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
409 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
410 fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
411 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
412 break;
413 case R500_SWIZZLE_ZERO:
414 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
415 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
416 break;
417 case R500_SWIZZLE_ONE:
418 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
419 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
420 break;
421 default:
422 ERROR("Bad src index in emit_mad: %d\n", two);
423 break;
424 }
425 switch (three) {
426 case 0:
427 case 1:
428 case 2:
429 fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three]));
430 fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three]));
431 fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
432 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
433 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
434 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
435 break;
436 case R500_SWIZZLE_ZERO:
437 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
438 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
439 break;
440 case R500_SWIZZLE_ONE:
441 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
442 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
443 break;
444 default:
445 ERROR("Bad src index in emit_mad: %d\n", three);
446 break;
447 }
448 }
449
450 static GLboolean parse_program(struct r500_fragment_program *fp)
451 {
452 struct gl_fragment_program *mp = &fp->mesa_program;
453 const struct prog_instruction *inst = mp->Base.Instructions;
454 struct prog_instruction *fpi;
455 GLuint src[3], dest = 0;
456 int temp_swiz, counter = 0;
457
458 if (!inst || inst[0].Opcode == OPCODE_END) {
459 ERROR("The program is empty!\n");
460 return GL_FALSE;
461 }
462
463 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
464
465 if (fpi->Opcode != OPCODE_KIL) {
466 dest = make_dest(fp, fpi->DstReg);
467 }
468
469 switch (fpi->Opcode) {
470 case OPCODE_ABS:
471 emit_alu(fp, counter, fpi);
472 emit_mov(fp, counter, fpi->SrcReg[0], dest);
473 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
474 | R500_ALU_RGB_MOD_B_ABS;
475 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
476 | R500_ALPHA_MOD_B_ABS;
477 break;
478 case OPCODE_ADD:
479 /* Variation on MAD: 1*src0+src1 */
480 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
481 break;
482 case OPCODE_CMP:
483 /* This inst's selects need to be swapped as follows:
484 * 0 -> C ; 1 -> B ; 2 -> A */
485 src[0] = make_src(fp, fpi->SrcReg[0]);
486 src[1] = make_src(fp, fpi->SrcReg[1]);
487 src[2] = make_src(fp, fpi->SrcReg[2]);
488 emit_alu(fp, counter, fpi);
489 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
490 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
491 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
492 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
493 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
494 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
495 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
496 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
497 | R500_ALPHA_ADDRD(dest)
498 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
499 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
500 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
501 | R500_ALU_RGBA_ADDRD(dest)
502 | R500_ALU_RGBA_SEL_C_SRC2
503 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
504 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
505 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
506 break;
507 case OPCODE_COS:
508 src[0] = make_src(fp, fpi->SrcReg[0]);
509 src[1] = emit_const4fv(fp, RCP_2PI);
510 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
511 | (R500_WRITEMASK_ARGB << 11);
512 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
513 | R500_RGB_ADDR1(src[1]);
514 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
515 | R500_ALPHA_ADDR1(src[1]);
516 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
517 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
518 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
519 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
520 | R500_ALPHA_ADDRD(get_temp(fp, 0))
521 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
522 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
523 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
524 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
525 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
526 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
527 counter++;
528 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
529 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
530 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
531 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
532 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
533 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
534 | R500_ALPHA_ADDRD(get_temp(fp, 1))
535 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
536 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
537 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
538 counter++;
539 emit_alu(fp, counter, fpi);
540 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
541 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
542 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
543 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
544 | R500_ALPHA_ADDRD(dest)
545 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
546 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
547 | R500_ALU_RGBA_ADDRD(dest);
548 break;
549 case OPCODE_DP3:
550 src[0] = make_src(fp, fpi->SrcReg[0]);
551 src[1] = make_src(fp, fpi->SrcReg[1]);
552 emit_alu(fp, counter, fpi);
553 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
554 | R500_RGB_ADDR1(src[1]);
555 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
556 | R500_ALPHA_ADDR1(src[1]);
557 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
558 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
559 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
560 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
561 | R500_ALPHA_ADDRD(dest)
562 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
563 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
564 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
565 | R500_ALU_RGBA_ADDRD(dest);
566 break;
567 case OPCODE_DP4:
568 src[0] = make_src(fp, fpi->SrcReg[0]);
569 src[1] = make_src(fp, fpi->SrcReg[1]);
570 /* Based on DP3 */
571 emit_alu(fp, counter, fpi);
572 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
573 | R500_RGB_ADDR1(src[1]);
574 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
575 | R500_ALPHA_ADDR1(src[1]);
576 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
577 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
578 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
579 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
580 | R500_ALPHA_ADDRD(dest)
581 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
582 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
583 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
584 | R500_ALU_RGBA_ADDRD(dest);
585 break;
586 case OPCODE_DPH:
587 src[0] = make_src(fp, fpi->SrcReg[0]);
588 src[1] = make_src(fp, fpi->SrcReg[1]);
589 /* Based on DP3 */
590 emit_alu(fp, counter, fpi);
591 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
592 | R500_RGB_ADDR1(src[1]);
593 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
594 | R500_ALPHA_ADDR1(src[1]);
595 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
596 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
597 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
598 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
599 | R500_ALPHA_ADDRD(dest)
600 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
601 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
602 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
603 | R500_ALU_RGBA_ADDRD(dest);
604 break;
605 case OPCODE_DST:
606 src[0] = make_src(fp, fpi->SrcReg[0]);
607 src[1] = make_src(fp, fpi->SrcReg[1]);
608 /* [1, src0.y*src1.y, src0.z, src1.w]
609 * So basically MUL with lotsa swizzling. */
610 emit_alu(fp, counter, fpi);
611 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
612 | R500_RGB_ADDR1(src[1]);
613 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
614 | R500_ALPHA_ADDR1(src[1]);
615 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
616 | R500_ALU_RGB_SEL_B_SRC1;
617 /* Select [1, y, z, 1] */
618 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
619 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
620 /* Select [1, y, 1, w] */
621 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
622 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
623 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
624 | R500_ALPHA_ADDRD(dest)
625 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
626 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
627 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
628 | R500_ALU_RGBA_ADDRD(dest)
629 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
630 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
631 break;
632 case OPCODE_EX2:
633 src[0] = make_src(fp, fpi->SrcReg[0]);
634 emit_alu(fp, counter, fpi);
635 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
636 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
637 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
638 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
639 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
640 | R500_ALPHA_ADDRD(dest)
641 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
642 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
643 | R500_ALU_RGBA_ADDRD(dest);
644 break;
645 case OPCODE_FRC:
646 src[0] = make_src(fp, fpi->SrcReg[0]);
647 emit_alu(fp, counter, fpi);
648 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
649 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
650 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
651 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
652 fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC
653 | R500_ALPHA_ADDRD(dest)
654 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
655 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
656 | R500_ALU_RGBA_ADDRD(dest);
657 break;
658 case OPCODE_LG2:
659 src[0] = make_src(fp, fpi->SrcReg[0]);
660 emit_alu(fp, counter, fpi);
661 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
662 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
663 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
665 fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2
666 | R500_ALPHA_ADDRD(dest)
667 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
668 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
669 | R500_ALU_RGBA_ADDRD(dest);
670 break;
671 case OPCODE_LRP:
672 /* src0 * src1 + INV(src0) * src2
673 * 1) MUL src0, src1, temp
674 * 2) PRE 1-src0; MAD srcp, src2, temp */
675 src[0] = make_src(fp, fpi->SrcReg[0]);
676 src[1] = make_src(fp, fpi->SrcReg[1]);
677 src[2] = make_src(fp, fpi->SrcReg[2]);
678 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
679 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
680 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
681 | R500_RGB_ADDR1(src[1]);
682 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
683 | R500_ALPHA_ADDR1(src[1]);
684 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
685 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
686 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
687 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
688 | R500_ALPHA_ADDRD(get_temp(fp, 0))
689 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
690 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
691 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
692 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
693 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
694 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
695 counter++;
696 emit_alu(fp, counter, fpi);
697 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
698 | R500_RGB_ADDR1(src[2])
699 | R500_RGB_ADDR2(get_temp(fp, 0))
700 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
701 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
702 | R500_ALPHA_ADDR1(src[2])
703 | R500_ALPHA_ADDR2(get_temp(fp, 0))
704 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
705 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
706 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
707 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
708 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
709 | R500_ALPHA_ADDRD(dest)
710 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
711 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
712 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
713 | R500_ALU_RGBA_ADDRD(dest)
714 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
715 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
716 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
717 break;
718 case OPCODE_MAD:
719 emit_mad(fp, counter, fpi, 0, 1, 2);
720 break;
721 case OPCODE_MAX:
722 src[0] = make_src(fp, fpi->SrcReg[0]);
723 src[1] = make_src(fp, fpi->SrcReg[1]);
724 emit_alu(fp, counter, fpi);
725 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
726 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
727 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
728 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
729 | R500_ALU_RGB_SEL_B_SRC1
730 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
731 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX
732 | R500_ALPHA_ADDRD(dest)
733 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
734 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
735 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
736 | R500_ALU_RGBA_ADDRD(dest);
737 break;
738 case OPCODE_MIN:
739 src[0] = make_src(fp, fpi->SrcReg[0]);
740 src[1] = make_src(fp, fpi->SrcReg[1]);
741 emit_alu(fp, counter, fpi);
742 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
743 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
744 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
745 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
746 | R500_ALU_RGB_SEL_B_SRC1
747 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
748 fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN
749 | R500_ALPHA_ADDRD(dest)
750 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
751 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
752 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
753 | R500_ALU_RGBA_ADDRD(dest);
754 break;
755 case OPCODE_MOV:
756 emit_alu(fp, counter, fpi);
757 emit_mov(fp, counter, fpi->SrcReg[0], dest);
758 break;
759 case OPCODE_MUL:
760 /* Variation on MAD: src0*src1+0 */
761 emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
762 break;
763 case OPCODE_POW:
764 /* POW(a,b) = EX2(LN2(a)*b) */
765 src[0] = make_src(fp, fpi->SrcReg[0]);
766 src[1] = make_src(fp, fpi->SrcReg[1]);
767 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
768 | (R500_WRITEMASK_ARGB << 11);
769 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
770 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
771 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
772 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
773 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
774 | R500_ALPHA_ADDRD(get_temp(fp, 0))
775 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
776 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
777 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
778 counter++;
779 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
780 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
781 | R500_RGB_ADDR1(src[1]);
782 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
783 | R500_ALPHA_ADDR1(src[1]);
784 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
785 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
786 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
787 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
788 | R500_ALPHA_ADDRD(get_temp(fp, 1))
789 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
790 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
791 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
792 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
793 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
794 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
795 counter++;
796 emit_alu(fp, counter, fpi);
797 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
798 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
799 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
800 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
801 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
802 | R500_ALPHA_ADDRD(dest)
803 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
804 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
805 | R500_ALU_RGBA_ADDRD(dest);
806 break;
807 case OPCODE_RCP:
808 src[0] = make_src(fp, fpi->SrcReg[0]);
809 emit_alu(fp, counter, fpi);
810 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
811 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
812 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
814 fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP
815 | R500_ALPHA_ADDRD(dest)
816 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
817 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
818 | R500_ALU_RGBA_ADDRD(dest);
819 break;
820 case OPCODE_RSQ:
821 src[0] = make_src(fp, fpi->SrcReg[0]);
822 emit_alu(fp, counter, fpi);
823 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
824 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
825 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
826 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
827 fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ
828 | R500_ALPHA_ADDRD(dest)
829 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
830 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
831 | R500_ALU_RGBA_ADDRD(dest);
832 break;
833 case OPCODE_SCS:
834 src[0] = make_src(fp, fpi->SrcReg[0]);
835 src[1] = emit_const4fv(fp, RCP_2PI);
836 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
837 | (R500_WRITEMASK_ARGB << 11);
838 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
839 | R500_RGB_ADDR1(src[1]);
840 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
841 | R500_ALPHA_ADDR1(src[1]);
842 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
843 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
844 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
845 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
846 | R500_ALPHA_ADDRD(get_temp(fp, 0))
847 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
848 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
849 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
850 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
851 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
852 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
853 counter++;
854 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
855 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
856 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
857 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
858 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
859 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
860 | R500_ALPHA_ADDRD(get_temp(fp, 1))
861 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
862 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
863 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
864 counter++;
865 /* Do a cosine, then a sine, masking out the channels we want to protect. */
866 /* Cosine only goes in R (x) channel. */
867 fpi->DstReg.WriteMask = 0x1;
868 emit_alu(fp, counter, fpi);
869 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
870 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
871 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
872 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
873 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
874 | R500_ALPHA_ADDRD(dest)
875 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
876 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
877 | R500_ALU_RGBA_ADDRD(dest);
878 counter++;
879 /* Sine only goes in G (y) channel. */
880 fpi->DstReg.WriteMask = 0x2;
881 emit_alu(fp, counter, fpi);
882 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
883 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
884 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
885 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
886 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
887 | R500_ALPHA_ADDRD(dest)
888 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
889 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
890 | R500_ALU_RGBA_ADDRD(dest);
891 break;
892 case OPCODE_SGE:
893 src[0] = make_src(fp, fpi->SrcReg[0]);
894 src[1] = make_src(fp, fpi->SrcReg[1]);
895 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
896 | (R500_WRITEMASK_ARGB << 11);
897 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
898 | R500_RGB_ADDR2(src[1]);
899 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
900 | R500_ALPHA_ADDR2(src[1]);
901 fp->inst[counter].inst3 = /* 1 */
902 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
903 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
904 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
905 | R500_ALPHA_ADDRD(get_temp(fp, 0))
906 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
907 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
908 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
909 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
910 | R500_ALU_RGBA_SEL_C_SRC2
911 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
912 | R500_ALU_RGBA_MOD_C_NEG
913 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
914 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
915 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
916 counter++;
917 /* This inst's selects need to be swapped as follows:
918 * 0 -> C ; 1 -> B ; 2 -> A */
919 emit_alu(fp, counter, fpi);
920 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
921 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
922 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
923 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
924 | R500_ALU_RGB_SEL_B_SRC0
925 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
926 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
927 | R500_ALPHA_ADDRD(dest)
928 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
929 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
930 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
931 | R500_ALU_RGBA_ADDRD(dest)
932 | R500_ALU_RGBA_SEL_C_SRC0
933 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
934 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
935 | R500_ALU_RGBA_A_SWIZ_A;
936 break;
937 case OPCODE_SIN:
938 src[0] = make_src(fp, fpi->SrcReg[0]);
939 src[1] = emit_const4fv(fp, RCP_2PI);
940 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
941 | (R500_WRITEMASK_ARGB << 11);
942 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
943 | R500_RGB_ADDR1(src[1]);
944 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
945 | R500_ALPHA_ADDR1(src[1]);
946 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
947 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
948 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
949 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
950 | R500_ALPHA_ADDRD(get_temp(fp, 0))
951 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
952 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
953 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
954 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
955 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
956 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
957 counter++;
958 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
959 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
960 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
961 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
962 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
963 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
964 | R500_ALPHA_ADDRD(get_temp(fp, 1))
965 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
966 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
967 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
968 counter++;
969 emit_alu(fp, counter, fpi);
970 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
971 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
972 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
973 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
974 | R500_ALPHA_ADDRD(dest)
975 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
976 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
977 | R500_ALU_RGBA_ADDRD(dest);
978 break;
979 case OPCODE_SLT:
980 src[0] = make_src(fp, fpi->SrcReg[0]);
981 src[1] = make_src(fp, fpi->SrcReg[1]);
982 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
983 | (R500_WRITEMASK_ARGB << 11);
984 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
985 | R500_RGB_ADDR2(src[1]);
986 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
987 | R500_ALPHA_ADDR2(src[1]);
988 fp->inst[counter].inst3 = /* 1 */
989 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
990 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
991 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
992 | R500_ALPHA_ADDRD(get_temp(fp, 0))
993 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
994 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
995 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
996 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
997 | R500_ALU_RGBA_SEL_C_SRC2
998 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
999 | R500_ALU_RGBA_MOD_C_NEG
1000 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1001 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1002 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1003 counter++;
1004 /* This inst's selects need to be swapped as follows:
1005 * 0 -> C ; 1 -> B ; 2 -> A */
1006 emit_alu(fp, counter, fpi);
1007 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1008 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1009 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1010 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1011 | R500_ALU_RGB_SEL_B_SRC0
1012 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1013 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1014 | R500_ALPHA_ADDRD(dest)
1015 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1016 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1017 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1018 | R500_ALU_RGBA_ADDRD(dest)
1019 | R500_ALU_RGBA_SEL_C_SRC0
1020 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1021 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1022 | R500_ALU_RGBA_A_SWIZ_A;
1023 break;
1024 case OPCODE_SUB:
1025 /* Variation on MAD: 1*src0-src1 */
1026 fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
1027 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
1028 break;
1029 case OPCODE_SWZ:
1030 /* TODO: The rarer negation masks! */
1031 emit_alu(fp, counter, fpi);
1032 emit_mov(fp, counter, fpi->SrcReg[0], dest);
1033 break;
1034 case OPCODE_KIL:
1035 case OPCODE_TEX:
1036 case OPCODE_TXB:
1037 case OPCODE_TXP:
1038 emit_tex(fp, fpi, dest, counter);
1039 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1040 counter++;
1041 break;
1042 default:
1043 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1044 break;
1045 }
1046
1047 /* Finishing touches */
1048 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1049 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1050 }
1051
1052 counter++;
1053
1054 if (fp->error)
1055 return GL_FALSE;
1056
1057 }
1058
1059 /* Finish him! (If it's an ALU/OUT instruction...) */
1060 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1061 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1062 } else {
1063 /* We still need to put an output inst, right? */
1064 WARN_ONCE("Final FP instruction is not an OUT.\n");
1065 }
1066
1067 fp->cs->nrslots = counter;
1068
1069 fp->max_temp_idx++;
1070
1071 return GL_TRUE;
1072 }
1073
1074 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1075 {
1076 struct r300_pfs_compile_state *cs = NULL;
1077 struct gl_fragment_program *mp = &fp->mesa_program;
1078 struct prog_instruction *fpi;
1079 GLuint InputsRead = mp->Base.InputsRead;
1080 GLuint temps_used = 0;
1081 int i, j;
1082
1083 /* New compile, reset tracking data */
1084 fp->optimization =
1085 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1086 fp->translated = GL_FALSE;
1087 fp->error = GL_FALSE;
1088 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1089 fp->cur_node = 0;
1090 fp->first_node_has_tex = 0;
1091 fp->const_nr = 0;
1092 /* Size of pixel stack, plus 1. */
1093 fp->max_temp_idx = 1;
1094 /* Temp register offset. */
1095 fp->temp_reg_offset = 0;
1096 fp->node[0].alu_end = -1;
1097 fp->node[0].tex_end = -1;
1098
1099 _mesa_memset(cs, 0, sizeof(*fp->cs));
1100 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1101 for (j = 0; j < 3; j++) {
1102 cs->slot[i].vsrc[j] = SRC_CONST;
1103 cs->slot[i].ssrc[j] = SRC_CONST;
1104 }
1105 }
1106
1107 /* Work out what temps the Mesa inputs correspond to, this must match
1108 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1109 * configures itself based on the fragprog's InputsRead
1110 *
1111 * NOTE: this depends on get_hw_temp() allocating registers in order,
1112 * starting from register 0, so we're just going to do that instead.
1113 */
1114
1115 /* Texcoords come first */
1116 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1117 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1118 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1119 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1120 fp->temp_reg_offset;
1121 fp->temp_reg_offset++;
1122 }
1123 }
1124 InputsRead &= ~FRAG_BITS_TEX_ANY;
1125
1126 /* fragment position treated as a texcoord */
1127 if (InputsRead & FRAG_BIT_WPOS) {
1128 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1129 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1130 fp->temp_reg_offset;
1131 fp->temp_reg_offset++;
1132 }
1133 InputsRead &= ~FRAG_BIT_WPOS;
1134
1135 /* Then primary colour */
1136 if (InputsRead & FRAG_BIT_COL0) {
1137 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1138 cs->inputs[FRAG_ATTRIB_COL0].reg =
1139 fp->temp_reg_offset;
1140 fp->temp_reg_offset++;
1141 }
1142 InputsRead &= ~FRAG_BIT_COL0;
1143
1144 /* Secondary color */
1145 if (InputsRead & FRAG_BIT_COL1) {
1146 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1147 cs->inputs[FRAG_ATTRIB_COL1].reg =
1148 fp->temp_reg_offset;
1149 fp->temp_reg_offset++;
1150 }
1151 InputsRead &= ~FRAG_BIT_COL1;
1152
1153 /* Anything else */
1154 if (InputsRead) {
1155 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1156 /* force read from hwreg 0 for now */
1157 for (i = 0; i < 32; i++)
1158 if (InputsRead & (1 << i))
1159 cs->inputs[i].reg = 0;
1160 }
1161
1162 if (!mp->Base.Instructions) {
1163 ERROR("No instructions found in program, going to go die now.\n");
1164 return;
1165 }
1166
1167 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1168 for (i = 0; i < 3; i++) {
1169 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1170 if (fpi->SrcReg[i].Index > temps_used)
1171 temps_used = fpi->SrcReg[i].Index;
1172 }
1173 }
1174 }
1175
1176 cs->temp_in_use = temps_used;
1177
1178 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1179 }
1180
1181 static void update_params(struct r500_fragment_program *fp)
1182 {
1183 struct gl_fragment_program *mp = &fp->mesa_program;
1184
1185 /* Ask Mesa nicely to fill in ParameterValues for us */
1186 if (mp->Base.Parameters)
1187 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1188 }
1189
1190 static void dumb_shader(struct r500_fragment_program *fp)
1191 {
1192 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1193 | R500_INST_TEX_SEM_WAIT
1194 | R500_INST_RGB_WMASK_R
1195 | R500_INST_RGB_WMASK_G
1196 | R500_INST_RGB_WMASK_B
1197 | R500_INST_ALPHA_WMASK
1198 | R500_INST_RGB_CLAMP
1199 | R500_INST_ALPHA_CLAMP;
1200 fp->inst[0].inst1 = R500_TEX_ID(0)
1201 | R500_TEX_INST_LD
1202 | R500_TEX_SEM_ACQUIRE
1203 | R500_TEX_IGNORE_UNCOVERED;
1204 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1205 | R500_TEX_SRC_S_SWIZ_R
1206 | R500_TEX_SRC_T_SWIZ_G
1207 | R500_TEX_DST_ADDR(0)
1208 | R500_TEX_DST_R_SWIZ_R
1209 | R500_TEX_DST_G_SWIZ_G
1210 | R500_TEX_DST_B_SWIZ_B
1211 | R500_TEX_DST_A_SWIZ_A;
1212 fp->inst[0].inst3 = R500_DX_ADDR(0)
1213 | R500_DX_S_SWIZ_R
1214 | R500_DX_T_SWIZ_R
1215 | R500_DX_R_SWIZ_R
1216 | R500_DX_Q_SWIZ_R
1217 | R500_DY_ADDR(0)
1218 | R500_DY_S_SWIZ_R
1219 | R500_DY_T_SWIZ_R
1220 | R500_DY_R_SWIZ_R
1221 | R500_DY_Q_SWIZ_R;
1222 fp->inst[0].inst4 = 0x0;
1223 fp->inst[0].inst5 = 0x0;
1224
1225 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1226 R500_INST_TEX_SEM_WAIT |
1227 R500_INST_LAST |
1228 R500_INST_RGB_OMASK_R |
1229 R500_INST_RGB_OMASK_G |
1230 R500_INST_RGB_OMASK_B |
1231 R500_INST_ALPHA_OMASK;
1232 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1233 R500_RGB_ADDR1(0) |
1234 R500_RGB_ADDR1_CONST |
1235 R500_RGB_ADDR2(0) |
1236 R500_RGB_ADDR2_CONST |
1237 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1238 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1239 R500_ALPHA_ADDR1(0) |
1240 R500_ALPHA_ADDR1_CONST |
1241 R500_ALPHA_ADDR2(0) |
1242 R500_ALPHA_ADDR2_CONST |
1243 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1244 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1245 R500_ALU_RGB_R_SWIZ_A_R |
1246 R500_ALU_RGB_G_SWIZ_A_G |
1247 R500_ALU_RGB_B_SWIZ_A_B |
1248 R500_ALU_RGB_SEL_B_SRC0 |
1249 R500_ALU_RGB_R_SWIZ_B_1 |
1250 R500_ALU_RGB_B_SWIZ_B_1 |
1251 R500_ALU_RGB_G_SWIZ_B_1;
1252 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1253 R500_ALPHA_SWIZ_A_A |
1254 R500_ALPHA_SWIZ_B_1;
1255 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1256 R500_ALU_RGBA_R_SWIZ_0 |
1257 R500_ALU_RGBA_G_SWIZ_0 |
1258 R500_ALU_RGBA_B_SWIZ_0 |
1259 R500_ALU_RGBA_A_SWIZ_0;
1260
1261 fp->cs->nrslots = 2;
1262 fp->translated = GL_TRUE;
1263 }
1264
1265 void r500TranslateFragmentShader(r300ContextPtr r300,
1266 struct r500_fragment_program *fp)
1267 {
1268
1269 struct r300_pfs_compile_state *cs = NULL;
1270
1271 if (!fp->translated) {
1272
1273 init_program(r300, fp);
1274 cs = fp->cs;
1275
1276 if (parse_program(fp) == GL_FALSE) {
1277 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1278 dumb_shader(fp);
1279 fp->inst_offset = 0;
1280 fp->inst_end = cs->nrslots - 1;
1281 return;
1282 }
1283 fp->inst_offset = 0;
1284 fp->inst_end = cs->nrslots - 1;
1285
1286 fp->translated = GL_TRUE;
1287 if (RADEON_DEBUG & DEBUG_PIXEL) {
1288 fprintf(stderr, "Mesa program:\n");
1289 fprintf(stderr, "-------------\n");
1290 _mesa_print_program(&fp->mesa_program.Base);
1291 fflush(stdout);
1292 dump_program(fp);
1293 }
1294
1295
1296 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1297 }
1298
1299 update_params(fp);
1300
1301 }
1302
1303 static char *toswiz(int swiz_val) {
1304 switch(swiz_val) {
1305 case 0: return "R";
1306 case 1: return "G";
1307 case 2: return "B";
1308 case 3: return "A";
1309 case 4: return "0";
1310 case 5: return "1/2";
1311 case 6: return "1";
1312 case 7: return "U";
1313 }
1314 return NULL;
1315 }
1316
1317 static char *toop(int op_val)
1318 {
1319 char *str;
1320 switch (op_val) {
1321 case 0: str = "MAD"; break;
1322 case 1: str = "DP3"; break;
1323 case 2: str = "DP4"; break;
1324 case 3: str = "D2A"; break;
1325 case 4: str = "MIN"; break;
1326 case 5: str = "MAX"; break;
1327 case 6: str = "Reserved"; break;
1328 case 7: str = "CND"; break;
1329 case 8: str = "CMP"; break;
1330 case 9: str = "FRC"; break;
1331 case 10: str = "SOP"; break;
1332 case 11: str = "MDH"; break;
1333 case 12: str = "MDV"; break;
1334 }
1335 return str;
1336 }
1337
1338 static char *to_alpha_op(int op_val)
1339 {
1340 char *str = NULL;
1341 switch (op_val) {
1342 case 0: str = "MAD"; break;
1343 case 1: str = "DP"; break;
1344 case 2: str = "MIN"; break;
1345 case 3: str = "MAX"; break;
1346 case 4: str = "Reserved"; break;
1347 case 5: str = "CND"; break;
1348 case 6: str = "CMP"; break;
1349 case 7: str = "FRC"; break;
1350 case 8: str = "EX2"; break;
1351 case 9: str = "LN2"; break;
1352 case 10: str = "RCP"; break;
1353 case 11: str = "RSQ"; break;
1354 case 12: str = "SIN"; break;
1355 case 13: str = "COS"; break;
1356 case 14: str = "MDH"; break;
1357 case 15: str = "MDV"; break;
1358 }
1359 return str;
1360 }
1361
1362 static char *to_mask(int val)
1363 {
1364 char *str = NULL;
1365 switch(val) {
1366 case 0: str = "NONE"; break;
1367 case 1: str = "R"; break;
1368 case 2: str = "G"; break;
1369 case 3: str = "RG"; break;
1370 case 4: str = "B"; break;
1371 case 5: str = "RB"; break;
1372 case 6: str = "GB"; break;
1373 case 7: str = "RGB"; break;
1374 case 8: str = "A"; break;
1375 case 9: str = "AR"; break;
1376 case 10: str = "AG"; break;
1377 case 11: str = "ARG"; break;
1378 case 12: str = "AB"; break;
1379 case 13: str = "ARB"; break;
1380 case 14: str = "AGB"; break;
1381 case 15: str = "ARGB"; break;
1382 }
1383 return str;
1384 }
1385
1386 static char *to_texop(int val)
1387 {
1388 switch(val) {
1389 case 0: return "NOP";
1390 case 1: return "LD";
1391 case 2: return "TEXKILL";
1392 case 3: return "PROJ";
1393 case 4: return "LODBIAS";
1394 case 5: return "LOD";
1395 case 6: return "DXDY";
1396 }
1397 return NULL;
1398 }
1399
1400 static void dump_program(struct r500_fragment_program *fp)
1401 {
1402 int pc = 0;
1403 int n;
1404 uint32_t inst;
1405 uint32_t inst0;
1406 char *str = NULL;
1407
1408 for (n = 0; n < fp->inst_end+1; n++) {
1409 inst0 = inst = fp->inst[n].inst0;
1410 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1411 switch(inst & 0x3) {
1412 case R500_INST_TYPE_ALU: str = "ALU"; break;
1413 case R500_INST_TYPE_OUT: str = "OUT"; break;
1414 case R500_INST_TYPE_FC: str = "FC"; break;
1415 case R500_INST_TYPE_TEX: str = "TEX"; break;
1416 };
1417 fprintf(stderr,"%s %s %s %s %s ", str,
1418 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1419 inst & R500_INST_LAST ? "LAST" : "",
1420 inst & R500_INST_NOP ? "NOP" : "",
1421 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1422 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1423 to_mask((inst >> 15) & 0xf));
1424
1425 switch(inst0 & 0x3) {
1426 case 0:
1427 case 1:
1428 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1429 inst = fp->inst[n].inst1;
1430
1431 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1432 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1433 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1434 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1435 (inst >> 30));
1436
1437 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1438 inst = fp->inst[n].inst2;
1439 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1440 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1441 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1442 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1443 (inst >> 30));
1444 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1445 inst = fp->inst[n].inst3;
1446 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1447 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1448 (inst >> 11) & 0x3,
1449 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1450 (inst >> 24) & 0x3);
1451
1452
1453 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1454 inst = fp->inst[n].inst4;
1455 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
1456 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1457 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1458 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
1459 (inst >> 31) & 0x1);
1460
1461 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1462 inst = fp->inst[n].inst5;
1463 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1464 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1465 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1466 (inst >> 23) & 0x3,
1467 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1468 break;
1469 case 2:
1470 break;
1471 case 3:
1472 inst = fp->inst[n].inst1;
1473 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1474 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1475 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1476 inst = fp->inst[n].inst2;
1477 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1478 inst & 127, inst & (1<<7) ? "(rel)" : "",
1479 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1480 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1481 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1482 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1483 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1484
1485 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1486 break;
1487 }
1488 fprintf(stderr,"\n");
1489 }
1490
1491 }