81976573c6b881a2ec527273e68e9fc7b62fc54c
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 * \todo Depth write, WPOS/FOGC inputs
41 *
42 * \todo FogOption
43 *
44 * \todo Verify results of opcodes for accuracy, I've only checked them in
45 * specific cases.
46 */
47
48 #include "glheader.h"
49 #include "macros.h"
50 #include "enums.h"
51 #include "shader/prog_instruction.h"
52 #include "shader/prog_parameter.h"
53 #include "shader/prog_print.h"
54
55 #include "r300_context.h"
56 #include "r500_fragprog.h"
57 #include "r300_reg.h"
58 #include "r300_state.h"
59
60 /*
61 * Useful macros and values
62 */
63 #define ERROR(fmt, args...) do { \
64 fprintf(stderr, "%s::%s(): " fmt "\n", \
65 __FILE__, __FUNCTION__, ##args); \
66 fp->error = GL_TRUE; \
67 } while(0)
68
69 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
70
71 #define R500_US_NUM_TEMP_REGS 128
72 #define R500_US_NUM_CONST_REGS 256
73
74 /* "Register" flags */
75 #define REG_CONSTANT (1 << 8)
76 #define REG_SRC_REL (1 << 9)
77 #define REG_DEST_REL (1 << 7)
78
79 /* Swizzle tools */
80 #define R500_SWIZZLE_ZERO 4
81 #define R500_SWIZZLE_HALF 5
82 #define R500_SWIZZLE_ONE 6
83 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
84 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
85 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
86 #define R500_SWIZ_MOD_NEG 1
87 #define R500_SWIZ_MOD_ABS 2
88 #define R500_SWIZ_MOD_NEG_ABS 3
89 /* Swizzles for inst2 */
90 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
91 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
92 /* Swizzles for inst3 */
93 #define MAKE_SWIZ_RGB_A(x) (x << 2)
94 #define MAKE_SWIZ_RGB_B(x) (x << 15)
95 /* Swizzles for inst4 */
96 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
97 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
98 /* Swizzle for inst5 */
99 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
100 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
101
102 /* Writemasks */
103 #define R500_WRITEMASK_G 0x2
104 #define R500_WRITEMASK_A 0x8
105 #define R500_WRITEMASK_ARGB 0xF
106
107 /* 1/(2pi), needed for quick modulus in trig insts
108 * Thanks to glisse for pointing out how to do it! */
109 static const GLfloat RCP_2PI[] = {0.15915494309189535,
110 0.15915494309189535,
111 0.15915494309189535,
112 0.15915494309189535};
113
114 static const GLfloat LIT[] = {127.999999,
115 127.999999,
116 127.999999,
117 -127.999999};
118
119 static void dump_program(struct r500_fragment_program *fp);
120
121 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
122 GLuint swiz = 0x0;
123 GLuint temp;
124 /* This could be optimized, but it should be plenty fast already. */
125 int i;
126 for (i = 0; i < 3; i++) {
127 temp = GET_SWZ(src.Swizzle, i);
128 /* Fix SWIZZLE_ONE */
129 if (temp == 5) temp++;
130 swiz |= temp << i*3;
131 }
132 if (src.NegateBase)
133 swiz |= (R500_SWIZ_MOD_NEG << 9);
134 return swiz;
135 }
136
137 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
138 GLuint swiz = GET_SWZ(src.Swizzle, 3);
139
140 if (swiz == 5) swiz++;
141
142 if (src.NegateBase)
143 swiz |= (R500_SWIZ_MOD_NEG << 3);
144
145 return swiz;
146 }
147
148 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
149 GLuint swiz = GET_SWZ(src.Swizzle, 0);
150
151 if (swiz == 5) swiz++;
152 return swiz;
153 }
154
155 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
156 GLuint swiz = 0x0, temp = 0x0;
157 int i;
158 for (i = 0; i < 4; i++) {
159 temp = GET_SWZ(src.Swizzle, i) & 0x3;
160 swiz |= temp << i*2;
161 }
162 return swiz;
163 }
164
165 static int get_temp(struct r500_fragment_program *fp, int slot) {
166
167 COMPILE_STATE;
168
169 int r = cs->temp_in_use + 1 + slot;
170
171 if (r > R500_US_NUM_TEMP_REGS) {
172 ERROR("Too many temporary registers requested, can't compile!\n");
173 }
174
175 return r;
176 }
177
178 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
179 static GLuint emit_const4fv(struct r500_fragment_program *fp,
180 const GLfloat * cp)
181 {
182 GLuint reg = 0x0;
183 int index;
184
185 for (index = 0; index < fp->const_nr; ++index) {
186 if (fp->constant[index] == cp)
187 break;
188 }
189
190 if (index >= fp->const_nr) {
191 if (index >= R500_US_NUM_CONST_REGS) {
192 ERROR("Out of hw constants!\n");
193 return reg;
194 }
195
196 fp->const_nr++;
197 fp->constant[index] = cp;
198 }
199
200 reg = index | REG_CONSTANT;
201 return reg;
202 }
203
204 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
205 COMPILE_STATE;
206 GLuint reg;
207 switch (src.File) {
208 case PROGRAM_TEMPORARY:
209 reg = src.Index + fp->temp_reg_offset;
210 break;
211 case PROGRAM_INPUT:
212 reg = cs->inputs[src.Index].reg;
213 break;
214 case PROGRAM_LOCAL_PARAM:
215 reg = emit_const4fv(fp,
216 fp->mesa_program.Base.LocalParams[src.
217 Index]);
218 break;
219 case PROGRAM_ENV_PARAM:
220 reg = emit_const4fv(fp,
221 fp->ctx->FragmentProgram.Parameters[src.
222 Index]);
223 break;
224 case PROGRAM_STATE_VAR:
225 case PROGRAM_NAMED_PARAM:
226 case PROGRAM_CONSTANT:
227 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
228 ParameterValues[src.Index]);
229 break;
230 default:
231 ERROR("Can't handle src.File %x\n", src.File);
232 reg = 0x0;
233 break;
234 }
235 return reg;
236 }
237
238 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
239 GLuint reg;
240 switch (dest.File) {
241 case PROGRAM_TEMPORARY:
242 reg = dest.Index + fp->temp_reg_offset;
243 break;
244 case PROGRAM_OUTPUT:
245 /* Eventually we may need to handle multiple
246 * rendering targets... */
247 reg = dest.Index;
248 break;
249 default:
250 ERROR("Can't handle dest.File %x\n", dest.File);
251 reg = 0x0;
252 break;
253 }
254 return reg;
255 }
256
257 static void emit_tex(struct r500_fragment_program *fp,
258 struct prog_instruction *fpi, int dest, int counter)
259 {
260 int hwsrc, hwdest;
261 GLuint mask;
262
263 mask = fpi->DstReg.WriteMask << 11;
264 hwsrc = make_src(fp, fpi->SrcReg[0]);
265
266 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
267 hwdest = get_temp(fp, 0);
268 } else {
269 hwdest = dest;
270 }
271
272 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
273 | R500_INST_TEX_SEM_WAIT;
274
275 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
276 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
277
278 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
279 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
280
281 switch (fpi->Opcode) {
282 case OPCODE_KIL:
283 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
284 break;
285 case OPCODE_TEX:
286 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
287 break;
288 case OPCODE_TXB:
289 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
290 break;
291 case OPCODE_TXP:
292 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
293 break;
294 default:
295 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
296 }
297
298 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
299 | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0]))
300 /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
301 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */
302 | R500_TEX_DST_ADDR(hwdest)
303 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
304 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
305
306 fp->inst[counter].inst3 = 0x0;
307 fp->inst[counter].inst4 = 0x0;
308 fp->inst[counter].inst5 = 0x0;
309
310 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
311 counter++;
312 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
313 | R500_INST_TEX_SEM_WAIT | (mask << 4);
314 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
315 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
316 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
317 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
318 | R500_ALU_RGB_SEL_B_SRC0
319 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
320 | R500_ALU_RGB_OMOD_DISABLE;
321 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
322 | R500_ALPHA_ADDRD(dest)
323 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
324 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
325 | R500_ALPHA_OMOD_DISABLE;
326 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
327 | R500_ALU_RGBA_ADDRD(dest)
328 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
329 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
330 }
331 }
332
333 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
334 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
335 fp->inst[counter].inst0 = R500_INST_TYPE_OUT;
336
337 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
338 fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
339
340 if (fpi->DstReg.Index == FRAG_RESULT_DEPR)
341 fp->inst[counter].inst4 = R500_ALPHA_W_OMASK;
342 } else {
343 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
344 /* pixel_mask */
345 | (fpi->DstReg.WriteMask << 11);
346 }
347
348 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
349
350 /* Ideally, we shouldn't have to explicitly clear memory here! */
351 fp->inst[counter].inst1 = 0x0;
352 fp->inst[counter].inst2 = 0x0;
353 fp->inst[counter].inst3 = 0x0;
354 fp->inst[counter].inst5 = 0x0;
355 }
356
357 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
358 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
359 * it is technically more accurate and recommended by ATI/AMD. */
360 GLuint src_reg = make_src(fp, src);
361 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
362 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
363 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
364 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
365 | R500_ALU_RGB_SEL_B_SRC0
366 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
367 | R500_ALU_RGB_OMOD_DISABLE;
368 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
369 | R500_ALPHA_ADDRD(dest)
370 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
371 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
372 | R500_ALPHA_OMOD_DISABLE;
373 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
374 | R500_ALU_RGBA_ADDRD(dest)
375 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
376 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
377 }
378
379 static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) {
380 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
381 * If you can make it pretty or fast, please do so! */
382 emit_alu(fp, counter, fpi);
383 /* Common MAD stuff */
384 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
385 | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg));
386 fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
387 | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg));
388 switch (one) {
389 case 0:
390 case 1:
391 case 2:
392 fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one]));
393 fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one]));
394 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
395 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
396 fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
397 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
398 break;
399 case R500_SWIZZLE_ZERO:
400 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
401 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
402 break;
403 case R500_SWIZZLE_ONE:
404 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
405 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
406 break;
407 default:
408 ERROR("Bad src index in emit_mad: %d\n", one);
409 break;
410 }
411 switch (two) {
412 case 0:
413 case 1:
414 case 2:
415 fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two]));
416 fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two]));
417 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
418 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
419 fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
420 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
421 break;
422 case R500_SWIZZLE_ZERO:
423 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
424 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
425 break;
426 case R500_SWIZZLE_ONE:
427 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
428 fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
429 break;
430 default:
431 ERROR("Bad src index in emit_mad: %d\n", two);
432 break;
433 }
434 switch (three) {
435 case 0:
436 case 1:
437 case 2:
438 fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three]));
439 fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three]));
440 fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
441 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
442 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
443 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
444 break;
445 case R500_SWIZZLE_ZERO:
446 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
447 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
448 break;
449 case R500_SWIZZLE_ONE:
450 fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
451 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
452 break;
453 default:
454 ERROR("Bad src index in emit_mad: %d\n", three);
455 break;
456 }
457 }
458
459 static GLboolean parse_program(struct r500_fragment_program *fp)
460 {
461 struct gl_fragment_program *mp = &fp->mesa_program;
462 const struct prog_instruction *inst = mp->Base.Instructions;
463 struct prog_instruction *fpi;
464 GLuint src[3], dest = 0;
465 int temp_swiz, counter = 0;
466
467 if (!inst || inst[0].Opcode == OPCODE_END) {
468 ERROR("The program is empty!\n");
469 return GL_FALSE;
470 }
471
472 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
473
474 if (fpi->Opcode != OPCODE_KIL) {
475 dest = make_dest(fp, fpi->DstReg);
476 }
477
478 switch (fpi->Opcode) {
479 case OPCODE_ABS:
480 emit_alu(fp, counter, fpi);
481 emit_mov(fp, counter, fpi->SrcReg[0], dest);
482 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
483 | R500_ALU_RGB_MOD_B_ABS;
484 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
485 | R500_ALPHA_MOD_B_ABS;
486 break;
487 case OPCODE_ADD:
488 /* Variation on MAD: 1*src0+src1 */
489 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
490 break;
491 case OPCODE_CMP:
492 /* This inst's selects need to be swapped as follows:
493 * 0 -> C ; 1 -> B ; 2 -> A */
494 src[0] = make_src(fp, fpi->SrcReg[0]);
495 src[1] = make_src(fp, fpi->SrcReg[1]);
496 src[2] = make_src(fp, fpi->SrcReg[2]);
497 emit_alu(fp, counter, fpi);
498 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
499 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
500 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
501 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
502 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
503 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
504 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
505 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
506 | R500_ALPHA_ADDRD(dest)
507 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
508 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
509 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
510 | R500_ALU_RGBA_ADDRD(dest)
511 | R500_ALU_RGBA_SEL_C_SRC2
512 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
513 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
514 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
515 break;
516 case OPCODE_COS:
517 src[0] = make_src(fp, fpi->SrcReg[0]);
518 src[1] = emit_const4fv(fp, RCP_2PI);
519 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
520 | (R500_WRITEMASK_ARGB << 11);
521 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
522 | R500_RGB_ADDR1(src[1]);
523 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
524 | R500_ALPHA_ADDR1(src[1]);
525 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
526 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
527 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
528 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
529 | R500_ALPHA_ADDRD(get_temp(fp, 0))
530 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
531 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
532 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
533 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
534 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
535 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
536 counter++;
537 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
538 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
539 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
540 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
541 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
542 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
543 | R500_ALPHA_ADDRD(get_temp(fp, 1))
544 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
545 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
546 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
547 counter++;
548 emit_alu(fp, counter, fpi);
549 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
550 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
551 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
552 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
553 | R500_ALPHA_ADDRD(dest)
554 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
555 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
556 | R500_ALU_RGBA_ADDRD(dest);
557 break;
558 case OPCODE_DP3:
559 src[0] = make_src(fp, fpi->SrcReg[0]);
560 src[1] = make_src(fp, fpi->SrcReg[1]);
561 emit_alu(fp, counter, fpi);
562 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
563 | R500_RGB_ADDR1(src[1]);
564 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
565 | R500_ALPHA_ADDR1(src[1]);
566 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
567 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
568 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
569 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
570 | R500_ALPHA_ADDRD(dest)
571 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
572 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
573 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
574 | R500_ALU_RGBA_ADDRD(dest);
575 break;
576 case OPCODE_DP4:
577 src[0] = make_src(fp, fpi->SrcReg[0]);
578 src[1] = make_src(fp, fpi->SrcReg[1]);
579 /* Based on DP3 */
580 emit_alu(fp, counter, fpi);
581 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
582 | R500_RGB_ADDR1(src[1]);
583 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
584 | R500_ALPHA_ADDR1(src[1]);
585 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
586 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
587 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
588 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
589 | R500_ALPHA_ADDRD(dest)
590 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
591 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
592 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
593 | R500_ALU_RGBA_ADDRD(dest);
594 break;
595 case OPCODE_DPH:
596 src[0] = make_src(fp, fpi->SrcReg[0]);
597 src[1] = make_src(fp, fpi->SrcReg[1]);
598 /* Based on DP3 */
599 emit_alu(fp, counter, fpi);
600 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
601 | R500_RGB_ADDR1(src[1]);
602 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
603 | R500_ALPHA_ADDR1(src[1]);
604 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
605 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
606 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
607 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
608 | R500_ALPHA_ADDRD(dest)
609 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
610 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
611 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
612 | R500_ALU_RGBA_ADDRD(dest);
613 break;
614 case OPCODE_DST:
615 src[0] = make_src(fp, fpi->SrcReg[0]);
616 src[1] = make_src(fp, fpi->SrcReg[1]);
617 /* [1, src0.y*src1.y, src0.z, src1.w]
618 * So basically MUL with lotsa swizzling. */
619 emit_alu(fp, counter, fpi);
620 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
621 | R500_RGB_ADDR1(src[1]);
622 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
623 | R500_ALPHA_ADDR1(src[1]);
624 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
625 | R500_ALU_RGB_SEL_B_SRC1;
626 /* Select [1, y, z, 1] */
627 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
628 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
629 /* Select [1, y, 1, w] */
630 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
631 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
632 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
633 | R500_ALPHA_ADDRD(dest)
634 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
635 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
636 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
637 | R500_ALU_RGBA_ADDRD(dest)
638 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
639 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
640 break;
641 case OPCODE_EX2:
642 src[0] = make_src(fp, fpi->SrcReg[0]);
643 emit_alu(fp, counter, fpi);
644 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
645 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
646 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
647 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
648 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
649 | R500_ALPHA_ADDRD(dest)
650 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
651 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
652 | R500_ALU_RGBA_ADDRD(dest);
653 break;
654 case OPCODE_FRC:
655 src[0] = make_src(fp, fpi->SrcReg[0]);
656 emit_alu(fp, counter, fpi);
657 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
658 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
659 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
660 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
661 fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC
662 | R500_ALPHA_ADDRD(dest)
663 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
664 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
665 | R500_ALU_RGBA_ADDRD(dest);
666 break;
667 case OPCODE_LG2:
668 src[0] = make_src(fp, fpi->SrcReg[0]);
669 emit_alu(fp, counter, fpi);
670 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
671 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
672 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
673 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
674 fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2
675 | R500_ALPHA_ADDRD(dest)
676 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
677 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
678 | R500_ALU_RGBA_ADDRD(dest);
679 break;
680 case OPCODE_LIT:
681 /* I think I've got a pretty good path through this.
682 * MAX temp1, tmp, [0, 0, 0, -128];
683 * MIN temp1.w, temp1.w, [128];
684 * POW temp1.z, temp1.y, temp1.w; (3 insts)
685 * MOV result.xyzw, [1, temp1.y, temp1.z, 1]; */
686 src[0] = make_src(fp, fpi->SrcReg[0]);
687 src[1] = emit_const4fv(fp, LIT);
688 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
689 | (R500_WRITEMASK_ARGB << 11);
690 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
691 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
692 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
693 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
694 | R500_ALU_RGB_SEL_B_SRC1
695 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
696 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
697 | R500_ALPHA_ADDRD(get_temp(fp, 0))
698 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
699 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
700 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
701 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
702 counter++;
703 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11);
704 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
705 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]);
706 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
707 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
708 | R500_ALU_RGB_SEL_B_SRC0
709 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
710 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
711 | R500_ALPHA_ADDRD(get_temp(fp, 0))
712 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
713 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A | R500_ALPHA_MOD_B_NEG;
714 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
715 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
716 counter++;
717 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
718 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
719 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
720 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
721 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
722 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
723 | R500_ALPHA_ADDRD(get_temp(fp, 1))
724 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G;
725 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
726 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
727 counter++;
728 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
729 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
730 | R500_RGB_ADDR1(get_temp(fp, 1));
731 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
732 | R500_ALPHA_ADDR1(get_temp(fp, 1));
733 /* Select [w, w, w, w] */
734 temp_swiz = 3 | (3 << 3) | (3 << 6);
735 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
736 | MAKE_SWIZ_RGB_A(temp_swiz)
737 | R500_ALU_RGB_SEL_B_SRC1
738 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
739 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
740 | R500_ALPHA_ADDRD(get_temp(fp, 1))
741 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
742 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
743 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
744 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
745 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
746 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
747 counter++;
748 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_G << 11);
749 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
750 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
751 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
752 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
753 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
754 | R500_ALPHA_ADDRD(get_temp(fp, 0))
755 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
756 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
757 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
758 counter++;
759 emit_alu(fp, counter, fpi);
760 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
761 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
762 /* Select [1, y, z, 1] */
763 temp_swiz = R500_SWIZZLE_ONE | (2 << 3) | (3 << 6);
764 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
765 | MAKE_SWIZ_RGB_A(temp_swiz)
766 | R500_ALU_RGB_SEL_B_SRC0
767 | MAKE_SWIZ_RGB_B(temp_swiz)
768 | R500_ALU_RGB_OMOD_DISABLE;
769 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
770 | R500_ALPHA_ADDRD(dest)
771 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1
772 | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1
773 | R500_ALPHA_OMOD_DISABLE;
774 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
775 | R500_ALU_RGBA_ADDRD(dest)
776 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
777 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
778 break;
779 case OPCODE_LRP:
780 /* src0 * src1 + INV(src0) * src2
781 * 1) MUL src0, src1, temp
782 * 2) PRE 1-src0; MAD srcp, src2, temp */
783 src[0] = make_src(fp, fpi->SrcReg[0]);
784 src[1] = make_src(fp, fpi->SrcReg[1]);
785 src[2] = make_src(fp, fpi->SrcReg[2]);
786 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
787 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
788 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
789 | R500_RGB_ADDR1(src[1]);
790 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
791 | R500_ALPHA_ADDR1(src[1]);
792 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
793 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
794 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
795 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
796 | R500_ALPHA_ADDRD(get_temp(fp, 0))
797 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
798 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
799 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
800 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
801 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
802 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
803 counter++;
804 emit_alu(fp, counter, fpi);
805 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
806 | R500_RGB_ADDR1(src[2])
807 | R500_RGB_ADDR2(get_temp(fp, 0))
808 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
809 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
810 | R500_ALPHA_ADDR1(src[2])
811 | R500_ALPHA_ADDR2(get_temp(fp, 0))
812 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
813 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
814 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
815 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
816 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
817 | R500_ALPHA_ADDRD(dest)
818 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
819 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
820 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
821 | R500_ALU_RGBA_ADDRD(dest)
822 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
823 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
824 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
825 break;
826 case OPCODE_MAD:
827 emit_mad(fp, counter, fpi, 0, 1, 2);
828 break;
829 case OPCODE_MAX:
830 src[0] = make_src(fp, fpi->SrcReg[0]);
831 src[1] = make_src(fp, fpi->SrcReg[1]);
832 emit_alu(fp, counter, fpi);
833 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
834 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
835 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
836 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
837 | R500_ALU_RGB_SEL_B_SRC1
838 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
839 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX
840 | R500_ALPHA_ADDRD(dest)
841 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
842 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
843 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
844 | R500_ALU_RGBA_ADDRD(dest);
845 break;
846 case OPCODE_MIN:
847 src[0] = make_src(fp, fpi->SrcReg[0]);
848 src[1] = make_src(fp, fpi->SrcReg[1]);
849 emit_alu(fp, counter, fpi);
850 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
851 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
852 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
853 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
854 | R500_ALU_RGB_SEL_B_SRC1
855 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
856 fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN
857 | R500_ALPHA_ADDRD(dest)
858 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
859 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
860 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
861 | R500_ALU_RGBA_ADDRD(dest);
862 break;
863 case OPCODE_MOV:
864 emit_alu(fp, counter, fpi);
865 emit_mov(fp, counter, fpi->SrcReg[0], dest);
866 break;
867 case OPCODE_MUL:
868 /* Variation on MAD: src0*src1+0 */
869 emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
870 break;
871 case OPCODE_POW:
872 /* POW(a,b) = EX2(LN2(a)*b) */
873 src[0] = make_src(fp, fpi->SrcReg[0]);
874 src[1] = make_src(fp, fpi->SrcReg[1]);
875 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
876 | (R500_WRITEMASK_ARGB << 11);
877 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
878 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
879 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
880 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
881 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
882 | R500_ALPHA_ADDRD(get_temp(fp, 0))
883 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
884 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
885 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
886 counter++;
887 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
888 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
889 | R500_RGB_ADDR1(src[1]);
890 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
891 | R500_ALPHA_ADDR1(src[1]);
892 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
893 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
894 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
895 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
896 | R500_ALPHA_ADDRD(get_temp(fp, 1))
897 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
898 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
899 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
900 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
901 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
902 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
903 counter++;
904 emit_alu(fp, counter, fpi);
905 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
906 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
907 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
908 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
909 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
910 | R500_ALPHA_ADDRD(dest)
911 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
912 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
913 | R500_ALU_RGBA_ADDRD(dest);
914 break;
915 case OPCODE_RCP:
916 src[0] = make_src(fp, fpi->SrcReg[0]);
917 emit_alu(fp, counter, fpi);
918 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
919 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
920 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
921 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
922 fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP
923 | R500_ALPHA_ADDRD(dest)
924 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
925 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
926 | R500_ALU_RGBA_ADDRD(dest);
927 break;
928 case OPCODE_RSQ:
929 src[0] = make_src(fp, fpi->SrcReg[0]);
930 emit_alu(fp, counter, fpi);
931 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
932 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
933 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
934 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
935 fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ
936 | R500_ALPHA_ADDRD(dest)
937 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
938 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
939 | R500_ALU_RGBA_ADDRD(dest);
940 break;
941 case OPCODE_SCS:
942 src[0] = make_src(fp, fpi->SrcReg[0]);
943 src[1] = emit_const4fv(fp, RCP_2PI);
944 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
945 | (R500_WRITEMASK_ARGB << 11);
946 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
947 | R500_RGB_ADDR1(src[1]);
948 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
949 | R500_ALPHA_ADDR1(src[1]);
950 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
951 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
952 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
953 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
954 | R500_ALPHA_ADDRD(get_temp(fp, 0))
955 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
956 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
957 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
958 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
959 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
960 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
961 counter++;
962 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
963 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
964 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
965 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
966 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
967 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
968 | R500_ALPHA_ADDRD(get_temp(fp, 1))
969 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
970 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
971 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
972 counter++;
973 /* Do a cosine, then a sine, masking out the channels we want to protect. */
974 /* Cosine only goes in R (x) channel. */
975 fpi->DstReg.WriteMask = 0x1;
976 emit_alu(fp, counter, fpi);
977 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
978 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
979 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
980 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
981 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
982 | R500_ALPHA_ADDRD(dest)
983 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
984 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
985 | R500_ALU_RGBA_ADDRD(dest);
986 counter++;
987 /* Sine only goes in G (y) channel. */
988 fpi->DstReg.WriteMask = 0x2;
989 emit_alu(fp, counter, fpi);
990 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
991 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
992 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
993 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
994 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
995 | R500_ALPHA_ADDRD(dest)
996 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
997 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
998 | R500_ALU_RGBA_ADDRD(dest);
999 break;
1000 case OPCODE_SGE:
1001 src[0] = make_src(fp, fpi->SrcReg[0]);
1002 src[1] = make_src(fp, fpi->SrcReg[1]);
1003 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1004 | (R500_WRITEMASK_ARGB << 11);
1005 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1006 | R500_RGB_ADDR2(src[1]);
1007 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1008 | R500_ALPHA_ADDR2(src[1]);
1009 fp->inst[counter].inst3 = /* 1 */
1010 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1011 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1012 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1013 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1014 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1015 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1016 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1017 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1018 | R500_ALU_RGBA_SEL_C_SRC2
1019 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1020 | R500_ALU_RGBA_MOD_C_NEG
1021 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1022 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1023 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1024 counter++;
1025 /* This inst's selects need to be swapped as follows:
1026 * 0 -> C ; 1 -> B ; 2 -> A */
1027 emit_alu(fp, counter, fpi);
1028 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1029 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1030 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1031 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1032 | R500_ALU_RGB_SEL_B_SRC0
1033 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
1034 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1035 | R500_ALPHA_ADDRD(dest)
1036 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1037 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
1038 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1039 | R500_ALU_RGBA_ADDRD(dest)
1040 | R500_ALU_RGBA_SEL_C_SRC0
1041 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1042 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1043 | R500_ALU_RGBA_A_SWIZ_A;
1044 break;
1045 case OPCODE_SIN:
1046 src[0] = make_src(fp, fpi->SrcReg[0]);
1047 src[1] = emit_const4fv(fp, RCP_2PI);
1048 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1049 | (R500_WRITEMASK_ARGB << 11);
1050 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1051 | R500_RGB_ADDR1(src[1]);
1052 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1053 | R500_ALPHA_ADDR1(src[1]);
1054 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1055 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
1056 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
1057 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1058 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1059 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
1060 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
1061 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1062 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1063 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1064 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1065 counter++;
1066 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1067 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1068 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1069 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1070 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
1071 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
1072 | R500_ALPHA_ADDRD(get_temp(fp, 1))
1073 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
1074 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
1075 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
1076 counter++;
1077 emit_alu(fp, counter, fpi);
1078 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
1079 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
1080 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
1081 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
1082 | R500_ALPHA_ADDRD(dest)
1083 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
1084 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
1085 | R500_ALU_RGBA_ADDRD(dest);
1086 break;
1087 case OPCODE_SLT:
1088 src[0] = make_src(fp, fpi->SrcReg[0]);
1089 src[1] = make_src(fp, fpi->SrcReg[1]);
1090 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1091 | (R500_WRITEMASK_ARGB << 11);
1092 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1093 | R500_RGB_ADDR2(src[1]);
1094 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1095 | R500_ALPHA_ADDR2(src[1]);
1096 fp->inst[counter].inst3 = /* 1 */
1097 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1098 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1099 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1100 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1101 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1102 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1103 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1104 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1105 | R500_ALU_RGBA_SEL_C_SRC2
1106 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1107 | R500_ALU_RGBA_MOD_C_NEG
1108 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1109 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1110 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1111 counter++;
1112 /* This inst's selects need to be swapped as follows:
1113 * 0 -> C ; 1 -> B ; 2 -> A */
1114 emit_alu(fp, counter, fpi);
1115 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1116 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1117 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1118 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1119 | R500_ALU_RGB_SEL_B_SRC0
1120 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1121 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1122 | R500_ALPHA_ADDRD(dest)
1123 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1124 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1125 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1126 | R500_ALU_RGBA_ADDRD(dest)
1127 | R500_ALU_RGBA_SEL_C_SRC0
1128 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1129 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1130 | R500_ALU_RGBA_A_SWIZ_A;
1131 break;
1132 case OPCODE_SUB:
1133 /* Variation on MAD: 1*src0-src1 */
1134 fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
1135 emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
1136 break;
1137 case OPCODE_SWZ:
1138 /* TODO: The rarer negation masks! */
1139 emit_alu(fp, counter, fpi);
1140 emit_mov(fp, counter, fpi->SrcReg[0], dest);
1141 break;
1142 case OPCODE_KIL:
1143 case OPCODE_TEX:
1144 case OPCODE_TXB:
1145 case OPCODE_TXP:
1146 emit_tex(fp, fpi, dest, counter);
1147 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1148 counter++;
1149 break;
1150 default:
1151 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1152 break;
1153 }
1154
1155 /* Finishing touches */
1156 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1157 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1158 }
1159
1160 counter++;
1161
1162 if (fp->error)
1163 return GL_FALSE;
1164
1165 }
1166
1167 /* Finish him! (If it's an ALU/OUT instruction...) */
1168 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1169 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1170 } else {
1171 /* We still need to put an output inst, right? */
1172 WARN_ONCE("Final FP instruction is not an OUT.\n");
1173 }
1174
1175 fp->cs->nrslots = counter;
1176
1177 fp->max_temp_idx++;
1178
1179 return GL_TRUE;
1180 }
1181
1182 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1183 {
1184 struct r300_pfs_compile_state *cs = NULL;
1185 struct gl_fragment_program *mp = &fp->mesa_program;
1186 struct prog_instruction *fpi;
1187 GLuint InputsRead = mp->Base.InputsRead;
1188 GLuint temps_used = 0;
1189 int i, j;
1190
1191 /* New compile, reset tracking data */
1192 fp->optimization =
1193 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1194 fp->translated = GL_FALSE;
1195 fp->error = GL_FALSE;
1196 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1197 fp->cur_node = 0;
1198 fp->first_node_has_tex = 0;
1199 fp->const_nr = 0;
1200 /* Size of pixel stack, plus 1. */
1201 fp->max_temp_idx = 1;
1202 /* Temp register offset. */
1203 fp->temp_reg_offset = 0;
1204 fp->node[0].alu_end = -1;
1205 fp->node[0].tex_end = -1;
1206
1207 _mesa_memset(cs, 0, sizeof(*fp->cs));
1208 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1209 for (j = 0; j < 3; j++) {
1210 cs->slot[i].vsrc[j] = SRC_CONST;
1211 cs->slot[i].ssrc[j] = SRC_CONST;
1212 }
1213 }
1214
1215 /* Work out what temps the Mesa inputs correspond to, this must match
1216 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1217 * configures itself based on the fragprog's InputsRead
1218 *
1219 * NOTE: this depends on get_hw_temp() allocating registers in order,
1220 * starting from register 0, so we're just going to do that instead.
1221 */
1222
1223 /* Texcoords come first */
1224 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1225 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1226 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1227 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1228 fp->temp_reg_offset;
1229 fp->temp_reg_offset++;
1230 }
1231 }
1232 InputsRead &= ~FRAG_BITS_TEX_ANY;
1233
1234 /* fragment position treated as a texcoord */
1235 if (InputsRead & FRAG_BIT_WPOS) {
1236 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1237 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1238 fp->temp_reg_offset;
1239 fp->temp_reg_offset++;
1240 }
1241 InputsRead &= ~FRAG_BIT_WPOS;
1242
1243 /* Then primary colour */
1244 if (InputsRead & FRAG_BIT_COL0) {
1245 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1246 cs->inputs[FRAG_ATTRIB_COL0].reg =
1247 fp->temp_reg_offset;
1248 fp->temp_reg_offset++;
1249 }
1250 InputsRead &= ~FRAG_BIT_COL0;
1251
1252 /* Secondary color */
1253 if (InputsRead & FRAG_BIT_COL1) {
1254 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1255 cs->inputs[FRAG_ATTRIB_COL1].reg =
1256 fp->temp_reg_offset;
1257 fp->temp_reg_offset++;
1258 }
1259 InputsRead &= ~FRAG_BIT_COL1;
1260
1261 /* Anything else */
1262 if (InputsRead) {
1263 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1264 /* force read from hwreg 0 for now */
1265 for (i = 0; i < 32; i++)
1266 if (InputsRead & (1 << i))
1267 cs->inputs[i].reg = 0;
1268 }
1269
1270 if (!mp->Base.Instructions) {
1271 ERROR("No instructions found in program, going to go die now.\n");
1272 return;
1273 }
1274
1275 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1276 for (i = 0; i < 3; i++) {
1277 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1278 if (fpi->SrcReg[i].Index > temps_used)
1279 temps_used = fpi->SrcReg[i].Index;
1280 }
1281 }
1282 }
1283
1284 cs->temp_in_use = temps_used;
1285
1286 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1287 }
1288
1289 static void update_params(struct r500_fragment_program *fp)
1290 {
1291 struct gl_fragment_program *mp = &fp->mesa_program;
1292
1293 /* Ask Mesa nicely to fill in ParameterValues for us */
1294 if (mp->Base.Parameters)
1295 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1296 }
1297
1298 static void dumb_shader(struct r500_fragment_program *fp)
1299 {
1300 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1301 | R500_INST_TEX_SEM_WAIT
1302 | R500_INST_RGB_WMASK_R
1303 | R500_INST_RGB_WMASK_G
1304 | R500_INST_RGB_WMASK_B
1305 | R500_INST_ALPHA_WMASK
1306 | R500_INST_RGB_CLAMP
1307 | R500_INST_ALPHA_CLAMP;
1308 fp->inst[0].inst1 = R500_TEX_ID(0)
1309 | R500_TEX_INST_LD
1310 | R500_TEX_SEM_ACQUIRE
1311 | R500_TEX_IGNORE_UNCOVERED;
1312 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1313 | R500_TEX_SRC_S_SWIZ_R
1314 | R500_TEX_SRC_T_SWIZ_G
1315 | R500_TEX_DST_ADDR(0)
1316 | R500_TEX_DST_R_SWIZ_R
1317 | R500_TEX_DST_G_SWIZ_G
1318 | R500_TEX_DST_B_SWIZ_B
1319 | R500_TEX_DST_A_SWIZ_A;
1320 fp->inst[0].inst3 = R500_DX_ADDR(0)
1321 | R500_DX_S_SWIZ_R
1322 | R500_DX_T_SWIZ_R
1323 | R500_DX_R_SWIZ_R
1324 | R500_DX_Q_SWIZ_R
1325 | R500_DY_ADDR(0)
1326 | R500_DY_S_SWIZ_R
1327 | R500_DY_T_SWIZ_R
1328 | R500_DY_R_SWIZ_R
1329 | R500_DY_Q_SWIZ_R;
1330 fp->inst[0].inst4 = 0x0;
1331 fp->inst[0].inst5 = 0x0;
1332
1333 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1334 R500_INST_TEX_SEM_WAIT |
1335 R500_INST_LAST |
1336 R500_INST_RGB_OMASK_R |
1337 R500_INST_RGB_OMASK_G |
1338 R500_INST_RGB_OMASK_B |
1339 R500_INST_ALPHA_OMASK;
1340 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1341 R500_RGB_ADDR1(0) |
1342 R500_RGB_ADDR1_CONST |
1343 R500_RGB_ADDR2(0) |
1344 R500_RGB_ADDR2_CONST |
1345 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1346 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1347 R500_ALPHA_ADDR1(0) |
1348 R500_ALPHA_ADDR1_CONST |
1349 R500_ALPHA_ADDR2(0) |
1350 R500_ALPHA_ADDR2_CONST |
1351 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1352 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1353 R500_ALU_RGB_R_SWIZ_A_R |
1354 R500_ALU_RGB_G_SWIZ_A_G |
1355 R500_ALU_RGB_B_SWIZ_A_B |
1356 R500_ALU_RGB_SEL_B_SRC0 |
1357 R500_ALU_RGB_R_SWIZ_B_1 |
1358 R500_ALU_RGB_B_SWIZ_B_1 |
1359 R500_ALU_RGB_G_SWIZ_B_1;
1360 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1361 R500_ALPHA_SWIZ_A_A |
1362 R500_ALPHA_SWIZ_B_1;
1363 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1364 R500_ALU_RGBA_R_SWIZ_0 |
1365 R500_ALU_RGBA_G_SWIZ_0 |
1366 R500_ALU_RGBA_B_SWIZ_0 |
1367 R500_ALU_RGBA_A_SWIZ_0;
1368
1369 fp->cs->nrslots = 2;
1370 fp->translated = GL_TRUE;
1371 }
1372
1373 void r500TranslateFragmentShader(r300ContextPtr r300,
1374 struct r500_fragment_program *fp)
1375 {
1376
1377 struct r300_pfs_compile_state *cs = NULL;
1378
1379 if (!fp->translated) {
1380
1381 init_program(r300, fp);
1382 cs = fp->cs;
1383
1384 if (parse_program(fp) == GL_FALSE) {
1385 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1386 dumb_shader(fp);
1387 fp->inst_offset = 0;
1388 fp->inst_end = cs->nrslots - 1;
1389 return;
1390 }
1391 fp->inst_offset = 0;
1392 fp->inst_end = cs->nrslots - 1;
1393
1394 fp->translated = GL_TRUE;
1395 if (RADEON_DEBUG & DEBUG_PIXEL) {
1396 fprintf(stderr, "Mesa program:\n");
1397 fprintf(stderr, "-------------\n");
1398 _mesa_print_program(&fp->mesa_program.Base);
1399 fflush(stdout);
1400 dump_program(fp);
1401 }
1402
1403
1404 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1405 }
1406
1407 update_params(fp);
1408
1409 }
1410
1411 static char *toswiz(int swiz_val) {
1412 switch(swiz_val) {
1413 case 0: return "R";
1414 case 1: return "G";
1415 case 2: return "B";
1416 case 3: return "A";
1417 case 4: return "0";
1418 case 5: return "1/2";
1419 case 6: return "1";
1420 case 7: return "U";
1421 }
1422 return NULL;
1423 }
1424
1425 static char *toop(int op_val)
1426 {
1427 char *str;
1428 switch (op_val) {
1429 case 0: str = "MAD"; break;
1430 case 1: str = "DP3"; break;
1431 case 2: str = "DP4"; break;
1432 case 3: str = "D2A"; break;
1433 case 4: str = "MIN"; break;
1434 case 5: str = "MAX"; break;
1435 case 6: str = "Reserved"; break;
1436 case 7: str = "CND"; break;
1437 case 8: str = "CMP"; break;
1438 case 9: str = "FRC"; break;
1439 case 10: str = "SOP"; break;
1440 case 11: str = "MDH"; break;
1441 case 12: str = "MDV"; break;
1442 }
1443 return str;
1444 }
1445
1446 static char *to_alpha_op(int op_val)
1447 {
1448 char *str = NULL;
1449 switch (op_val) {
1450 case 0: str = "MAD"; break;
1451 case 1: str = "DP"; break;
1452 case 2: str = "MIN"; break;
1453 case 3: str = "MAX"; break;
1454 case 4: str = "Reserved"; break;
1455 case 5: str = "CND"; break;
1456 case 6: str = "CMP"; break;
1457 case 7: str = "FRC"; break;
1458 case 8: str = "EX2"; break;
1459 case 9: str = "LN2"; break;
1460 case 10: str = "RCP"; break;
1461 case 11: str = "RSQ"; break;
1462 case 12: str = "SIN"; break;
1463 case 13: str = "COS"; break;
1464 case 14: str = "MDH"; break;
1465 case 15: str = "MDV"; break;
1466 }
1467 return str;
1468 }
1469
1470 static char *to_mask(int val)
1471 {
1472 char *str = NULL;
1473 switch(val) {
1474 case 0: str = "NONE"; break;
1475 case 1: str = "R"; break;
1476 case 2: str = "G"; break;
1477 case 3: str = "RG"; break;
1478 case 4: str = "B"; break;
1479 case 5: str = "RB"; break;
1480 case 6: str = "GB"; break;
1481 case 7: str = "RGB"; break;
1482 case 8: str = "A"; break;
1483 case 9: str = "AR"; break;
1484 case 10: str = "AG"; break;
1485 case 11: str = "ARG"; break;
1486 case 12: str = "AB"; break;
1487 case 13: str = "ARB"; break;
1488 case 14: str = "AGB"; break;
1489 case 15: str = "ARGB"; break;
1490 }
1491 return str;
1492 }
1493
1494 static char *to_texop(int val)
1495 {
1496 switch(val) {
1497 case 0: return "NOP";
1498 case 1: return "LD";
1499 case 2: return "TEXKILL";
1500 case 3: return "PROJ";
1501 case 4: return "LODBIAS";
1502 case 5: return "LOD";
1503 case 6: return "DXDY";
1504 }
1505 return NULL;
1506 }
1507
1508 static void dump_program(struct r500_fragment_program *fp)
1509 {
1510 int pc = 0;
1511 int n;
1512 uint32_t inst;
1513 uint32_t inst0;
1514 char *str = NULL;
1515
1516 for (n = 0; n < fp->inst_end+1; n++) {
1517 inst0 = inst = fp->inst[n].inst0;
1518 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1519 switch(inst & 0x3) {
1520 case R500_INST_TYPE_ALU: str = "ALU"; break;
1521 case R500_INST_TYPE_OUT: str = "OUT"; break;
1522 case R500_INST_TYPE_FC: str = "FC"; break;
1523 case R500_INST_TYPE_TEX: str = "TEX"; break;
1524 };
1525 fprintf(stderr,"%s %s %s %s %s ", str,
1526 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1527 inst & R500_INST_LAST ? "LAST" : "",
1528 inst & R500_INST_NOP ? "NOP" : "",
1529 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1530 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1531 to_mask((inst >> 15) & 0xf));
1532
1533 switch(inst0 & 0x3) {
1534 case 0:
1535 case 1:
1536 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1537 inst = fp->inst[n].inst1;
1538
1539 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1540 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1541 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1542 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1543 (inst >> 30));
1544
1545 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1546 inst = fp->inst[n].inst2;
1547 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1548 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1549 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1550 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1551 (inst >> 30));
1552 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1553 inst = fp->inst[n].inst3;
1554 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1555 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1556 (inst >> 11) & 0x3,
1557 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1558 (inst >> 24) & 0x3);
1559
1560
1561 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1562 inst = fp->inst[n].inst4;
1563 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
1564 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1565 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1566 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
1567 (inst >> 31) & 0x1);
1568
1569 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1570 inst = fp->inst[n].inst5;
1571 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1572 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1573 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1574 (inst >> 23) & 0x3,
1575 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1576 break;
1577 case 2:
1578 break;
1579 case 3:
1580 inst = fp->inst[n].inst1;
1581 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1582 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1583 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1584 inst = fp->inst[n].inst2;
1585 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1586 inst & 127, inst & (1<<7) ? "(rel)" : "",
1587 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1588 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1589 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1590 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1591 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1592
1593 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1594 break;
1595 }
1596 fprintf(stderr,"\n");
1597 }
1598
1599 }