r500: print out opcode string
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static void dump_program(struct r500_fragment_program *fp);
97
98 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
99 GLuint swiz = 0x0;
100 GLuint temp;
101 /* This could be optimized, but it should be plenty fast already. */
102 int i;
103 for (i = 0; i < 3; i++) {
104 temp = GET_SWZ(src.Swizzle, i);
105 /* Fix SWIZZLE_ONE */
106 if (temp == 5) temp++;
107 swiz += temp << i*3;
108 }
109 return swiz;
110 }
111
112 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
113 GLuint swiz = GET_SWZ(src.Swizzle, 3);
114
115 if (swiz == 5) swiz++;
116 return swiz;
117 }
118
119 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
120 GLuint swiz = GET_SWZ(src.Swizzle, 0);
121
122 if (swiz == 5) swiz++;
123 return swiz;
124 }
125
126 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
127 GLuint swiz = 0x0;
128 GLuint temp = src.Swizzle;
129 int i;
130 for (i = 0; i < 4; i++) {
131 swiz += (temp & 0x3) << i*2;
132 temp >>= 3;
133 }
134 return swiz;
135 }
136
137 static int get_temp(struct r500_fragment_program *fp, int slot) {
138
139 COMPILE_STATE;
140
141 int r = cs->temp_in_use + 1 + slot;
142
143 if (r > R500_US_NUM_TEMP_REGS) {
144 ERROR("Too many temporary registers requested, can't compile!\n");
145 }
146
147 return r;
148 }
149
150 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
151 static GLuint emit_const4fv(struct r500_fragment_program *fp,
152 const GLfloat * cp)
153 {
154 GLuint reg = 0x0;
155 int index;
156
157 for (index = 0; index < fp->const_nr; ++index) {
158 if (fp->constant[index] == cp)
159 break;
160 }
161
162 if (index >= fp->const_nr) {
163 if (index >= R500_US_NUM_CONST_REGS) {
164 ERROR("Out of hw constants!\n");
165 return reg;
166 }
167
168 fp->const_nr++;
169 fp->constant[index] = cp;
170 }
171
172 reg = index | REG_CONSTANT;
173 return reg;
174 }
175
176 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
177 COMPILE_STATE;
178 GLuint reg;
179 switch (src.File) {
180 case PROGRAM_TEMPORARY:
181 reg = src.Index + fp->temp_reg_offset;
182 break;
183 case PROGRAM_INPUT:
184 reg = cs->inputs[src.Index].reg;
185 break;
186 case PROGRAM_LOCAL_PARAM:
187 reg = emit_const4fv(fp,
188 fp->mesa_program.Base.LocalParams[src.
189 Index]);
190 break;
191 case PROGRAM_ENV_PARAM:
192 reg = emit_const4fv(fp,
193 fp->ctx->FragmentProgram.Parameters[src.
194 Index]);
195 break;
196 case PROGRAM_STATE_VAR:
197 case PROGRAM_NAMED_PARAM:
198 case PROGRAM_CONSTANT:
199 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
200 ParameterValues[src.Index]);
201 break;
202 default:
203 ERROR("Can't handle src.File %x\n", src.File);
204 reg = 0x0;
205 break;
206 }
207 return reg;
208 }
209
210 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
211 GLuint reg;
212 switch (dest.File) {
213 case PROGRAM_TEMPORARY:
214 reg = dest.Index + fp->temp_reg_offset;
215 break;
216 case PROGRAM_OUTPUT:
217 /* Eventually we may need to handle multiple
218 * rendering targets... */
219 reg = dest.Index;
220 break;
221 default:
222 ERROR("Can't handle dest.File %x\n", dest.File);
223 reg = 0x0;
224 break;
225 }
226 return reg;
227 }
228
229 static void emit_tex(struct r500_fragment_program *fp,
230 struct prog_instruction *fpi, int opcode, int dest, int counter)
231 {
232 int hwsrc, hwdest;
233 GLuint mask;
234
235 mask = fpi->DstReg.WriteMask << 11;
236 hwsrc = make_src(fp, fpi->SrcReg[0]);
237
238 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
239 hwdest = get_temp(fp, 0);
240 } else {
241 hwdest = dest;
242 }
243
244 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
245 | R500_INST_TEX_SEM_WAIT;
246
247 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
248 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
249
250 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
251 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
252
253 switch (opcode) {
254 case OPCODE_KIL:
255 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
256 break;
257 case OPCODE_TEX:
258 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
259 break;
260 case OPCODE_TXB:
261 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
262 break;
263 case OPCODE_TXP:
264 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
265 break;
266 default:
267 ERROR("emit_tex can't handle opcode %x\n", opcode);
268 }
269
270 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
271 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
272 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
273 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
274 | R500_TEX_DST_ADDR(hwdest)
275 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
276 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
277
278 fp->inst[counter].inst3 = 0x0;
279 fp->inst[counter].inst4 = 0x0;
280 fp->inst[counter].inst5 = 0x0;
281
282 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
283 counter++;
284 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
285 | R500_INST_TEX_SEM_WAIT | (mask << 4);
286 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
287 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
288 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
289 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
290 | R500_ALU_RGB_SEL_B_SRC0
291 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
292 | R500_ALU_RGB_OMOD_DISABLE;
293 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
294 | R500_ALPHA_ADDRD(dest)
295 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
296 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
297 | R500_ALPHA_OMOD_DISABLE;
298 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
299 | R500_ALU_RGBA_ADDRD(dest)
300 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
301 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
302 }
303 }
304
305 static void dumb_shader(struct r500_fragment_program *fp)
306 {
307 fp->inst[0].inst0 = R500_INST_TYPE_TEX
308 | R500_INST_TEX_SEM_WAIT
309 | R500_INST_RGB_WMASK_R
310 | R500_INST_RGB_WMASK_G
311 | R500_INST_RGB_WMASK_B
312 | R500_INST_ALPHA_WMASK
313 | R500_INST_RGB_CLAMP
314 | R500_INST_ALPHA_CLAMP;
315 fp->inst[0].inst1 = R500_TEX_ID(0)
316 | R500_TEX_INST_LD
317 | R500_TEX_SEM_ACQUIRE
318 | R500_TEX_IGNORE_UNCOVERED;
319 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
320 | R500_TEX_SRC_S_SWIZ_R
321 | R500_TEX_SRC_T_SWIZ_G
322 | R500_TEX_DST_ADDR(0)
323 | R500_TEX_DST_R_SWIZ_R
324 | R500_TEX_DST_G_SWIZ_G
325 | R500_TEX_DST_B_SWIZ_B
326 | R500_TEX_DST_A_SWIZ_A;
327 fp->inst[0].inst3 = R500_DX_ADDR(0)
328 | R500_DX_S_SWIZ_R
329 | R500_DX_T_SWIZ_R
330 | R500_DX_R_SWIZ_R
331 | R500_DX_Q_SWIZ_R
332 | R500_DY_ADDR(0)
333 | R500_DY_S_SWIZ_R
334 | R500_DY_T_SWIZ_R
335 | R500_DY_R_SWIZ_R
336 | R500_DY_Q_SWIZ_R;
337 fp->inst[0].inst4 = 0x0;
338 fp->inst[0].inst5 = 0x0;
339
340 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
341 R500_INST_TEX_SEM_WAIT |
342 R500_INST_LAST |
343 R500_INST_RGB_OMASK_R |
344 R500_INST_RGB_OMASK_G |
345 R500_INST_RGB_OMASK_B |
346 R500_INST_ALPHA_OMASK;
347 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
348 R500_RGB_ADDR1(0) |
349 R500_RGB_ADDR1_CONST |
350 R500_RGB_ADDR2(0) |
351 R500_RGB_ADDR2_CONST |
352 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
353 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
354 R500_ALPHA_ADDR1(0) |
355 R500_ALPHA_ADDR1_CONST |
356 R500_ALPHA_ADDR2(0) |
357 R500_ALPHA_ADDR2_CONST |
358 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
359 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
360 R500_ALU_RGB_R_SWIZ_A_R |
361 R500_ALU_RGB_G_SWIZ_A_G |
362 R500_ALU_RGB_B_SWIZ_A_B |
363 R500_ALU_RGB_SEL_B_SRC0 |
364 R500_ALU_RGB_R_SWIZ_B_1 |
365 R500_ALU_RGB_B_SWIZ_B_1 |
366 R500_ALU_RGB_G_SWIZ_B_1;
367 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
368 R500_ALPHA_SWIZ_A_A |
369 R500_ALPHA_SWIZ_B_1;
370 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
371 R500_ALU_RGBA_R_SWIZ_0 |
372 R500_ALU_RGBA_G_SWIZ_0 |
373 R500_ALU_RGBA_B_SWIZ_0 |
374 R500_ALU_RGBA_A_SWIZ_0;
375
376 fp->cs->nrslots = 2;
377 fp->translated = GL_TRUE;
378 }
379
380 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
381 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
382 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
383 /* output_mask */
384 | (fpi->DstReg.WriteMask << 15);
385 } else {
386 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
387 /* pixel_mask */
388 | (fpi->DstReg.WriteMask << 11);
389 }
390
391 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
392 }
393
394 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
395 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
396 * it is technically more accurate and recommended by ATI/AMD. */
397 GLuint src_reg = make_src(fp, src);
398 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
399 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
400 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
401 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
402 | R500_ALU_RGB_SEL_B_SRC0
403 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
404 | R500_ALU_RGB_OMOD_DISABLE;
405 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
406 | R500_ALPHA_ADDRD(dest)
407 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
408 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
409 | R500_ALPHA_OMOD_DISABLE;
410 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
411 | R500_ALU_RGBA_ADDRD(dest)
412 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
413 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
414 }
415
416 static GLboolean parse_program(struct r500_fragment_program *fp)
417 {
418 struct gl_fragment_program *mp = &fp->mesa_program;
419 const struct prog_instruction *inst = mp->Base.Instructions;
420 struct prog_instruction *fpi;
421 GLuint src[3], dest, temp[2];
422 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
423
424 if (!inst || inst[0].Opcode == OPCODE_END) {
425 ERROR("The program is empty!\n");
426 return GL_FALSE;
427 }
428
429 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
430
431 if (fpi->Opcode != OPCODE_KIL) {
432 dest = make_dest(fp, fpi->DstReg);
433
434 pixel_mask = fpi->DstReg.WriteMask << 11;
435 output_mask = fpi->DstReg.WriteMask << 15;
436 }
437
438 switch (fpi->Opcode) {
439 case OPCODE_ABS:
440 emit_alu(fp, counter, fpi);
441 emit_mov(fp, counter, fpi->SrcReg[0], dest);
442 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
443 | R500_ALU_RGB_MOD_B_ABS;
444 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
445 | R500_ALPHA_MOD_B_ABS;
446 break;
447 case OPCODE_ADD:
448 src[0] = make_src(fp, fpi->SrcReg[0]);
449 src[1] = make_src(fp, fpi->SrcReg[1]);
450 /* Variation on MAD: 1*src0+src1 */
451 emit_alu(fp, counter, fpi);
452 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
453 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
454 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
455 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
456 fp->inst[counter].inst3 = /* 1 */
457 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
458 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
459 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
460 | R500_ALPHA_ADDRD(dest)
461 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
462 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
463 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
464 | R500_ALU_RGBA_ADDRD(dest)
465 | R500_ALU_RGBA_SEL_C_SRC1
466 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
467 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
468 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
469 break;
470 case OPCODE_CMP:
471 /* This inst's selects need to be swapped as follows:
472 * 0 -> C ; 1 -> B ; 2 -> A */
473 src[0] = make_src(fp, fpi->SrcReg[0]);
474 src[1] = make_src(fp, fpi->SrcReg[1]);
475 src[2] = make_src(fp, fpi->SrcReg[2]);
476 emit_alu(fp, counter, fpi);
477 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
478 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
479 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
480 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
481 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
482 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
483 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
484 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
485 | R500_ALPHA_ADDRD(dest)
486 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
487 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
488 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
489 | R500_ALU_RGBA_ADDRD(dest)
490 | R500_ALU_RGBA_SEL_C_SRC2
491 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
492 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
493 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
494 break;
495 case OPCODE_COS:
496 src[0] = make_src(fp, fpi->SrcReg[0]);
497 emit_alu(fp, counter, fpi);
498 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
499 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
500 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
501 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
502 | R500_ALPHA_ADDRD(dest)
503 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
504 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
505 | R500_ALU_RGBA_ADDRD(dest);
506 break;
507 case OPCODE_DP3:
508 src[0] = make_src(fp, fpi->SrcReg[0]);
509 src[1] = make_src(fp, fpi->SrcReg[1]);
510 emit_alu(fp, counter, fpi);
511 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
512 | R500_RGB_ADDR1(src[1]);
513 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
514 | R500_ALPHA_ADDR1(src[1]);
515 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
516 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
517 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
518 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
519 | R500_ALPHA_ADDRD(dest)
520 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
521 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
522 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
523 | R500_ALU_RGBA_ADDRD(dest);
524 break;
525 case OPCODE_DP4:
526 src[0] = make_src(fp, fpi->SrcReg[0]);
527 src[1] = make_src(fp, fpi->SrcReg[1]);
528 /* Based on DP3 */
529 emit_alu(fp, counter, fpi);
530 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
531 | R500_RGB_ADDR1(src[1]);
532 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
533 | R500_ALPHA_ADDR1(src[1]);
534 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
535 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
536 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
537 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
538 | R500_ALPHA_ADDRD(dest)
539 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
540 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
541 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
542 | R500_ALU_RGBA_ADDRD(dest);
543 break;
544 case OPCODE_DPH:
545 src[0] = make_src(fp, fpi->SrcReg[0]);
546 src[1] = make_src(fp, fpi->SrcReg[1]);
547 /* Based on DP3 */
548 emit_alu(fp, counter, fpi);
549 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
550 | R500_RGB_ADDR1(src[1]);
551 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
552 | R500_ALPHA_ADDR1(src[1]);
553 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
554 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
555 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
556 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
557 | R500_ALPHA_ADDRD(dest)
558 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
559 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
560 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
561 | R500_ALU_RGBA_ADDRD(dest);
562 break;
563 case OPCODE_EX2:
564 src[0] = make_src(fp, fpi->SrcReg[0]);
565 emit_alu(fp, counter, fpi);
566 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
567 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
568 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
569 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
570 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
571 | R500_ALPHA_ADDRD(dest)
572 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
573 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
574 | R500_ALU_RGBA_ADDRD(dest);
575 break;
576 case OPCODE_FRC:
577 src[0] = make_src(fp, fpi->SrcReg[0]);
578 emit_alu(fp, counter, fpi);
579 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
580 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
581 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
582 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
583 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
584 | R500_ALPHA_ADDRD(dest)
585 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
586 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
587 | R500_ALU_RGBA_ADDRD(dest);
588 break;
589 case OPCODE_KIL:
590 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
591 break;
592 case OPCODE_LG2:
593 src[0] = make_src(fp, fpi->SrcReg[0]);
594 emit_alu(fp, counter, fpi);
595 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
596 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
597 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
598 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
599 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
600 | R500_ALPHA_ADDRD(dest)
601 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
602 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
603 | R500_ALU_RGBA_ADDRD(dest);
604 break;
605 case OPCODE_MAD:
606 src[0] = make_src(fp, fpi->SrcReg[0]);
607 src[1] = make_src(fp, fpi->SrcReg[1]);
608 src[2] = make_src(fp, fpi->SrcReg[2]);
609 emit_alu(fp, counter, fpi);
610 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
611 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
612 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
613 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
614 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
615 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
616 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
617 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
618 | R500_ALPHA_ADDRD(dest)
619 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
620 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
621 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
622 | R500_ALU_RGBA_ADDRD(dest)
623 | R500_ALU_RGBA_SEL_C_SRC2
624 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
625 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
626 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
627 break;
628 case OPCODE_MAX:
629 src[0] = make_src(fp, fpi->SrcReg[0]);
630 src[1] = make_src(fp, fpi->SrcReg[1]);
631 emit_alu(fp, counter, fpi);
632 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
633 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
634 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
635 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
636 | R500_ALU_RGB_SEL_B_SRC1
637 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
638 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
639 | R500_ALPHA_ADDRD(dest)
640 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
641 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
642 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
643 | R500_ALU_RGBA_ADDRD(dest);
644 break;
645 case OPCODE_MIN:
646 src[0] = make_src(fp, fpi->SrcReg[0]);
647 src[1] = make_src(fp, fpi->SrcReg[1]);
648 emit_alu(fp, counter, fpi);
649 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
650 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
651 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
652 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
653 | R500_ALU_RGB_SEL_B_SRC1
654 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
655 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
656 | R500_ALPHA_ADDRD(dest)
657 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
658 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
659 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
660 | R500_ALU_RGBA_ADDRD(dest);
661 break;
662 case OPCODE_MOV:
663 emit_alu(fp, counter, fpi);
664 emit_mov(fp, counter, fpi->SrcReg[0], dest);
665 break;
666 case OPCODE_MUL:
667 src[0] = make_src(fp, fpi->SrcReg[0]);
668 src[1] = make_src(fp, fpi->SrcReg[1]);
669 /* Variation on MAD: src0*src1+0 */
670 emit_alu(fp, counter, fpi);
671 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
672 | R500_RGB_ADDR1(src[1]);
673 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
674 | R500_ALPHA_ADDR1(src[1]);
675 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
676 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
677 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
678 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
679 | R500_ALPHA_ADDRD(dest)
680 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
681 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
682 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
683 | R500_ALU_RGBA_ADDRD(dest)
684 // | R500_ALU_RGBA_SEL_C_SRC2
685 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
686 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
687 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
688 break;
689 case OPCODE_RCP:
690 src[0] = make_src(fp, fpi->SrcReg[0]);
691 emit_alu(fp, counter, fpi);
692 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
693 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
694 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
695 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
696 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
697 | R500_ALPHA_ADDRD(dest)
698 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
699 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
700 | R500_ALU_RGBA_ADDRD(dest);
701 break;
702 case OPCODE_RSQ:
703 src[0] = make_src(fp, fpi->SrcReg[0]);
704 emit_alu(fp, counter, fpi);
705 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
706 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
707 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
708 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
709 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
710 | R500_ALPHA_ADDRD(dest)
711 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
712 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
713 | R500_ALU_RGBA_ADDRD(dest);
714 break;
715 case OPCODE_SCS:
716 /* TODO: Make this elegant! */
717 /* Do a cosine, then a sine, masking out the channels we want to protect. */
718 src[0] = make_src(fp, fpi->SrcReg[0]);
719 /* Cosine only goes in R (x) channel. */
720 fpi->DstReg.WriteMask = 0x1;
721 emit_alu(fp, counter, fpi);
722 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
723 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
724 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
725 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
726 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
727 | R500_ALPHA_ADDRD(dest)
728 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
729 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
730 | R500_ALU_RGBA_ADDRD(dest);
731 counter++;
732 /* Sine only goes in G (y) channel. */
733 fpi->DstReg.WriteMask = 0x2;
734 emit_alu(fp, counter, fpi);
735 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
736 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
737 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
738 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
739 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
740 | R500_ALPHA_ADDRD(dest)
741 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
742 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
743 | R500_ALU_RGBA_ADDRD(dest);
744 /* Put 0 into B,A (z,w) channels.
745 counter++;
746 fpi->DstReg.WriteMask = 0xC;
747 emit_alu(fp, counter, fpi);
748 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
749 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
750 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
751 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
752 | R500_ALU_RGB_SEL_B_SRC0
753 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
754 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
755 | R500_ALPHA_ADDRD(dest)
756 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
757 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
758 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
759 | R500_ALU_RGBA_ADDRD(dest)
760 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
761 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
762 break;
763 case OPCODE_SGE:
764 /* We use SRCP, so as a precaution we're
765 * going to set NOP in previous inst, if possible. */
766 /* This inst's selects need to be swapped as follows:
767 * 0 -> C ; 1 -> B ; 2 -> A */
768 src[0] = make_src(fp, fpi->SrcReg[0]);
769 src[1] = make_src(fp, fpi->SrcReg[1]);
770 emit_alu(fp, counter, fpi);
771 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
772 | R500_RGB_ADDR1(src[1])
773 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
774 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
775 | R500_ALPHA_ADDR1(src[1])
776 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
777 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
778 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
779 | R500_ALU_RGB_SEL_B_SRC1
780 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
781 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
782 | R500_ALPHA_ADDRD(dest)
783 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
784 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
785 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
786 | R500_ALU_RGBA_ADDRD(dest)
787 | R500_ALU_RGBA_SEL_C_SRCP
788 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
789 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
790 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
791 break;
792 case OPCODE_SIN:
793 src[0] = make_src(fp, fpi->SrcReg[0]);
794 emit_alu(fp, counter, fpi);
795 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
796 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
797 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
798 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
799 | R500_ALPHA_ADDRD(dest)
800 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
801 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
802 | R500_ALU_RGBA_ADDRD(dest);
803 break;
804 case OPCODE_SLT:
805 /* We use SRCP, so as a precaution we're
806 * going to set NOP in previous inst, if possible. */
807 /* This inst's selects need to be swapped as follows:
808 * 0 -> C ; 1 -> B ; 2 -> A */
809 src[0] = make_src(fp, fpi->SrcReg[0]);
810 src[1] = make_src(fp, fpi->SrcReg[1]);
811 emit_alu(fp, counter, fpi);
812 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
813 | R500_RGB_ADDR1(src[1])
814 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
815 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
816 | R500_ALPHA_ADDR1(src[1])
817 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
818 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
819 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
820 | R500_ALU_RGB_SEL_B_SRC1
821 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
822 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
823 | R500_ALPHA_ADDRD(dest)
824 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
825 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
826 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
827 | R500_ALU_RGBA_ADDRD(dest)
828 | R500_ALU_RGBA_SEL_C_SRCP
829 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
830 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
831 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
832 break;
833 case OPCODE_SUB:
834 src[0] = make_src(fp, fpi->SrcReg[0]);
835 src[1] = make_src(fp, fpi->SrcReg[1]);
836 /* Variation on MAD: 1*src0-src1 */
837 emit_alu(fp, counter, fpi);
838 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
839 | R500_RGB_ADDR2(src[1]);
840 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
841 | R500_ALPHA_ADDR2(src[1]);
842 fp->inst[counter].inst3 = /* 1 */
843 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
844 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
845 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
846 | R500_ALPHA_ADDRD(dest)
847 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
848 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
849 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
850 | R500_ALU_RGBA_ADDRD(dest)
851 | R500_ALU_RGBA_SEL_C_SRC2
852 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
853 | R500_ALU_RGBA_MOD_C_NEG
854 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
855 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
856 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
857 break;
858 case OPCODE_SWZ:
859 /* TODO: Negation masks! */
860 emit_alu(fp, counter, fpi);
861 emit_mov(fp, counter, fpi->SrcReg[0], dest);
862 break;
863 case OPCODE_TEX:
864 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
865 if (fpi->DstReg.File == PROGRAM_OUTPUT)
866 counter++;
867 break;
868 case OPCODE_TXB:
869 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
870 if (fpi->DstReg.File == PROGRAM_OUTPUT)
871 counter++;
872 break;
873 case OPCODE_TXP:
874 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
875 if (fpi->DstReg.File == PROGRAM_OUTPUT)
876 counter++;
877 break;
878 default:
879 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
880 break;
881 }
882
883 /* Finishing touches */
884 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
885 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
886 }
887
888 counter++;
889
890 if (fp->error)
891 return GL_FALSE;
892
893 }
894
895 /* Finish him! (If it's an ALU/OUT instruction...) */
896 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
897 fp->inst[counter-1].inst0 |= R500_INST_LAST;
898 } else {
899 /* We still need to put an output inst, right? */
900 WARN_ONCE("Final FP instruction is not an OUT.\n");
901 #if 0
902
903 #endif
904 }
905
906 fp->cs->nrslots = counter;
907
908 fp->max_temp_idx++;
909
910 return GL_TRUE;
911 }
912
913 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
914 {
915 struct r300_pfs_compile_state *cs = NULL;
916 struct gl_fragment_program *mp = &fp->mesa_program;
917 struct prog_instruction *fpi;
918 GLuint InputsRead = mp->Base.InputsRead;
919 GLuint temps_used = 0;
920 int i, j;
921
922 /* New compile, reset tracking data */
923 fp->optimization =
924 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
925 fp->translated = GL_FALSE;
926 fp->error = GL_FALSE;
927 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
928 fp->cur_node = 0;
929 fp->first_node_has_tex = 0;
930 fp->const_nr = 0;
931 /* Size of pixel stack, plus 1. */
932 fp->max_temp_idx = 1;
933 /* Temp register offset. */
934 fp->temp_reg_offset = 0;
935 fp->node[0].alu_end = -1;
936 fp->node[0].tex_end = -1;
937
938 _mesa_memset(cs, 0, sizeof(*fp->cs));
939 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
940 for (j = 0; j < 3; j++) {
941 cs->slot[i].vsrc[j] = SRC_CONST;
942 cs->slot[i].ssrc[j] = SRC_CONST;
943 }
944 }
945
946 /* Work out what temps the Mesa inputs correspond to, this must match
947 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
948 * configures itself based on the fragprog's InputsRead
949 *
950 * NOTE: this depends on get_hw_temp() allocating registers in order,
951 * starting from register 0, so we're just going to do that instead.
952 */
953
954 /* Texcoords come first */
955 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
956 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
957 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
958 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
959 fp->temp_reg_offset;
960 fp->temp_reg_offset++;
961 }
962 }
963 InputsRead &= ~FRAG_BITS_TEX_ANY;
964
965 /* fragment position treated as a texcoord */
966 if (InputsRead & FRAG_BIT_WPOS) {
967 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
968 cs->inputs[FRAG_ATTRIB_WPOS].reg =
969 fp->temp_reg_offset;
970 fp->temp_reg_offset++;
971 }
972 InputsRead &= ~FRAG_BIT_WPOS;
973
974 /* Then primary colour */
975 if (InputsRead & FRAG_BIT_COL0) {
976 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
977 cs->inputs[FRAG_ATTRIB_COL0].reg =
978 fp->temp_reg_offset;
979 fp->temp_reg_offset++;
980 }
981 InputsRead &= ~FRAG_BIT_COL0;
982
983 /* Secondary color */
984 if (InputsRead & FRAG_BIT_COL1) {
985 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
986 cs->inputs[FRAG_ATTRIB_COL1].reg =
987 fp->temp_reg_offset;
988 fp->temp_reg_offset++;
989 }
990 InputsRead &= ~FRAG_BIT_COL1;
991
992 /* Anything else */
993 if (InputsRead) {
994 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
995 /* force read from hwreg 0 for now */
996 for (i = 0; i < 32; i++)
997 if (InputsRead & (1 << i))
998 cs->inputs[i].reg = 0;
999 }
1000
1001 if (!mp->Base.Instructions) {
1002 ERROR("No instructions found in program, going to go die now.\n");
1003 return;
1004 }
1005
1006 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1007 for (i = 0; i < 3; i++) {
1008 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1009 if (fpi->SrcReg[i].Index > temps_used)
1010 temps_used = fpi->SrcReg[i].Index;
1011 }
1012 }
1013 }
1014
1015 cs->temp_in_use = temps_used;
1016
1017 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1018 }
1019
1020 static void update_params(struct r500_fragment_program *fp)
1021 {
1022 struct gl_fragment_program *mp = &fp->mesa_program;
1023
1024 /* Ask Mesa nicely to fill in ParameterValues for us */
1025 if (mp->Base.Parameters)
1026 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1027 }
1028
1029 void r500TranslateFragmentShader(r300ContextPtr r300,
1030 struct r500_fragment_program *fp)
1031 {
1032
1033 struct r300_pfs_compile_state *cs = NULL;
1034
1035 if (!fp->translated) {
1036
1037
1038
1039 init_program(r300, fp);
1040 cs = fp->cs;
1041
1042 if (parse_program(fp) == GL_FALSE) {
1043 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1044 dumb_shader(fp);
1045 fp->inst_offset = 0;
1046 fp->inst_end = cs->nrslots - 1;
1047 return;
1048 }
1049 fp->inst_offset = 0;
1050 fp->inst_end = cs->nrslots - 1;
1051
1052 fp->translated = GL_TRUE;
1053 if (RADEON_DEBUG & DEBUG_PIXEL) {
1054 dump_program(fp);
1055 fprintf(stderr, "Mesa program:\n");
1056 fprintf(stderr, "-------------\n");
1057 _mesa_print_program(&fp->mesa_program.Base);
1058 fflush(stdout);
1059 }
1060
1061
1062 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1063 }
1064
1065 update_params(fp);
1066
1067 }
1068
1069 static char *toswiz(int swiz_val) {
1070 switch(swiz_val) {
1071 case 0: return "R";
1072 case 1: return "G";
1073 case 2: return "B";
1074 case 3: return "A";
1075 case 4: return "0";
1076 case 5: return "1/2";
1077 case 6: return "1";
1078 case 7: return "U";
1079 }
1080 return NULL;
1081 }
1082
1083 static char *toop(int op_val)
1084 {
1085 char *str;
1086 switch (op_val) {
1087 case 0: str = "MAD"; break;
1088 case 1: str = "DP3"; break;
1089 case 2: str = "DP4"; break;
1090 case 3: str = "D2A"; break;
1091 case 4: str = "MIN"; break;
1092 case 5: str = "MAX"; break;
1093 case 6: str = "Reserved"; break;
1094 case 7: str = "CND"; break;
1095 case 8: str = "CMP"; break;
1096 case 9: str = "FRC"; break;
1097 case 10: str = "SOP"; break;
1098 case 11: str = "MDH"; break;
1099 case 12: str = "MDV"; break;
1100 }
1101 return str;
1102 }
1103
1104 static char *to_alpha_op(int op_val)
1105 {
1106 char *str = NULL;
1107 switch (op_val) {
1108 case 0: str = "MAD"; break;
1109 case 1: str = "DP"; break;
1110 case 2: str = "MIN"; break;
1111 case 3: str = "MAX"; break;
1112 case 4: str = "Reserved"; break;
1113 case 5: str = "CND"; break;
1114 case 6: str = "CMP"; break;
1115 case 7: str = "FRC"; break;
1116 case 8: str = "EX2"; break;
1117 case 9: str = "LN2"; break;
1118 case 10: str = "RCP"; break;
1119 case 11: str = "RSQ"; break;
1120 case 12: str = "SIN"; break;
1121 case 13: str = "COS"; break;
1122 case 14: str = "MDH"; break;
1123 case 15: str = "MDV"; break;
1124 }
1125 return str;
1126 }
1127
1128 static char *to_mask(int val)
1129 {
1130 char *str = NULL;
1131 switch(val) {
1132 case 0: str = "NONE"; break;
1133 case 1: str = "R"; break;
1134 case 2: str = "G"; break;
1135 case 3: str = "RG"; break;
1136 case 4: str = "B"; break;
1137 case 5: str = "RB"; break;
1138 case 6: str = "GB"; break;
1139 case 7: str = "RGB"; break;
1140 case 8: str = "A"; break;
1141 case 9: str = "AR"; break;
1142 case 10: str = "AG"; break;
1143 case 11: str = "ARG"; break;
1144 case 12: str = "AB"; break;
1145 case 13: str = "ARB"; break;
1146 case 14: str = "AGB"; break;
1147 case 15: str = "ARGB"; break;
1148 }
1149 return str;
1150 }
1151
1152 static char *to_texop(int val)
1153 {
1154 switch(val) {
1155 case 0: return "NOP";
1156 case 1: return "LD";
1157 case 2: return "TEXKILL";
1158 case 3: return "PROJ";
1159 case 4: return "LODBIAS";
1160 case 5: return "LOD";
1161 case 6: return "DXDY";
1162 }
1163 return NULL;
1164 }
1165
1166 static void dump_program(struct r500_fragment_program *fp)
1167 {
1168 int pc = 0;
1169 int n;
1170 uint32_t inst;
1171 uint32_t inst0;
1172 char *str = NULL;
1173
1174 for (n = 0; n < fp->inst_end+1; n++) {
1175 inst0 = inst = fp->inst[n].inst0;
1176 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1177 switch(inst & 0x3) {
1178 case R500_INST_TYPE_ALU: str = "ALU"; break;
1179 case R500_INST_TYPE_OUT: str = "OUT"; break;
1180 case R500_INST_TYPE_FC: str = "FC"; break;
1181 case R500_INST_TYPE_TEX: str = "TEX"; break;
1182 };
1183 fprintf(stderr,"%s %s %s %s %s ", str,
1184 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1185 inst & R500_INST_LAST ? "LAST" : "",
1186 inst & R500_INST_NOP ? "NOP" : "",
1187 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1188 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1189 to_mask((inst >> 15) & 0xf));
1190
1191 switch(inst0 & 0x3) {
1192 case 0:
1193 case 1:
1194 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1195 inst = fp->inst[n].inst1;
1196
1197 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1198 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1199 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1200 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1201 (inst >> 30));
1202
1203 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1204 inst = fp->inst[n].inst2;
1205 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1206 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1207 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1208 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1209 (inst >> 30));
1210 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1211 inst = fp->inst[n].inst3;
1212 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1213 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1214 (inst >> 11) & 0x3,
1215 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1216 (inst >> 24) & 0x3);
1217
1218
1219 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1220 inst = fp->inst[n].inst4;
1221 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1222 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1223 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1224 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1225
1226 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1227 inst = fp->inst[n].inst5;
1228 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1229 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1230 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1231 (inst >> 23) & 0x3,
1232 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1233 break;
1234 case 2:
1235 break;
1236 case 3:
1237 inst = fp->inst[n].inst1;
1238 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1239 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1240 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1241 inst = fp->inst[n].inst2;
1242 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1243 inst & 127, inst & (1<<7) ? "(rel)" : "",
1244 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1245 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1246 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1247 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1248 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1249
1250 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1251 break;
1252 }
1253 fprintf(stderr,"\n");
1254 }
1255
1256 }