r5xx: Fixup emit_tex, add debugging info, enable temp temps.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static void dump_program(struct r500_fragment_program *fp);
97
98 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
99 GLuint swiz = 0x0;
100 GLuint temp;
101 /* This could be optimized, but it should be plenty fast already. */
102 int i;
103 for (i = 0; i < 3; i++) {
104 temp = GET_SWZ(src.Swizzle, i);
105 /* Fix SWIZZLE_ONE */
106 if (temp == 5) temp++;
107 swiz += temp << i*3;
108 }
109 return swiz;
110 }
111
112 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
113 GLuint swiz = GET_SWZ(src.Swizzle, 3);
114
115 if (swiz == 5) swiz++;
116 return swiz;
117 }
118
119 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
120 GLuint swiz = 0x0;
121 GLuint temp = src.Swizzle;
122 int i;
123 for (i = 0; i < 4; i++) {
124 swiz += (temp & 0x3) << i*2;
125 temp >>= 3;
126 }
127 return swiz;
128 }
129
130 static int get_temp(struct r500_fragment_program *fp) {
131 return fp->max_temp_idx + 1;
132 }
133
134 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
135 static GLuint emit_const4fv(struct r500_fragment_program *fp,
136 const GLfloat * cp)
137 {
138 GLuint reg = 0x0;
139 int index;
140
141 for (index = 0; index < fp->const_nr; ++index) {
142 if (fp->constant[index] == cp)
143 break;
144 }
145
146 if (index >= fp->const_nr) {
147 if (index >= R500_US_NUM_CONST_REGS) {
148 ERROR("Out of hw constants!\n");
149 return reg;
150 }
151
152 fp->const_nr++;
153 fp->constant[index] = cp;
154 }
155
156 reg = index | REG_CONSTANT;
157 return reg;
158 }
159
160 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
161 COMPILE_STATE;
162 GLuint reg;
163 switch (src.File) {
164 case PROGRAM_TEMPORARY:
165 reg = src.Index + fp->temp_reg_offset;
166 break;
167 case PROGRAM_INPUT:
168 reg = cs->inputs[src.Index].reg;
169 break;
170 case PROGRAM_LOCAL_PARAM:
171 reg = emit_const4fv(fp,
172 fp->mesa_program.Base.LocalParams[src.
173 Index]);
174 break;
175 case PROGRAM_ENV_PARAM:
176 reg = emit_const4fv(fp,
177 fp->ctx->FragmentProgram.Parameters[src.
178 Index]);
179 break;
180 case PROGRAM_STATE_VAR:
181 case PROGRAM_NAMED_PARAM:
182 case PROGRAM_CONSTANT:
183 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
184 ParameterValues[src.Index]);
185 break;
186 default:
187 ERROR("Can't handle src.File %x\n", src.File);
188 reg = 0x0;
189 break;
190 }
191 return reg;
192 }
193
194 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
195 GLuint reg;
196 switch (dest.File) {
197 case PROGRAM_TEMPORARY:
198 reg = dest.Index + fp->temp_reg_offset;
199 break;
200 case PROGRAM_OUTPUT:
201 /* Eventually we may need to handle multiple
202 * rendering targets... */
203 reg = dest.Index;
204 break;
205 default:
206 ERROR("Can't handle dest.File %x\n", dest.File);
207 reg = 0x0;
208 break;
209 }
210 return reg;
211 }
212
213 static void emit_tex(struct r500_fragment_program *fp,
214 struct prog_instruction *fpi, int opcode, int dest, int counter)
215 {
216 int hwsrc, hwdest;
217 GLuint mask;
218
219 mask = fpi->DstReg.WriteMask << 11;
220 hwsrc = make_src(fp, fpi->SrcReg[0]);
221
222 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
223 hwdest = get_temp(fp);
224 } else {
225 hwdest = dest;
226 }
227
228 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
229 | R500_INST_TEX_SEM_WAIT;
230
231 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
232 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
233
234 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
235 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
236
237 switch (opcode) {
238 case OPCODE_KIL:
239 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
240 break;
241 case OPCODE_TEX:
242 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
243 break;
244 case OPCODE_TXB:
245 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
246 break;
247 case OPCODE_TXP:
248 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
249 break;
250 default:
251 ERROR("emit_tex can't handle opcode %x\n", opcode);
252 }
253
254 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
255 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
256 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
257 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
258 | R500_TEX_DST_ADDR(hwdest)
259 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
260 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
261
262 fp->inst[counter].inst3 = 0x0;
263 fp->inst[counter].inst4 = 0x0;
264 fp->inst[counter].inst5 = 0x0;
265
266 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
267 counter++;
268 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
269 | R500_INST_TEX_SEM_WAIT | (mask << 4);
270 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp));
271 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp));
272 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
273 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
274 | R500_ALU_RGB_SEL_B_SRC0
275 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
276 | R500_ALU_RGB_OMOD_DISABLE;
277 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
278 | R500_ALPHA_ADDRD(dest)
279 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
280 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
281 | R500_ALPHA_OMOD_DISABLE;
282 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
283 | R500_ALU_RGBA_ADDRD(dest)
284 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
285 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
286 }
287 }
288
289 static void dumb_shader(struct r500_fragment_program *fp)
290 {
291 fp->inst[0].inst0 = R500_INST_TYPE_TEX
292 | R500_INST_TEX_SEM_WAIT
293 | R500_INST_RGB_WMASK_R
294 | R500_INST_RGB_WMASK_G
295 | R500_INST_RGB_WMASK_B
296 | R500_INST_ALPHA_WMASK
297 | R500_INST_RGB_CLAMP
298 | R500_INST_ALPHA_CLAMP;
299 fp->inst[0].inst1 = R500_TEX_ID(0)
300 | R500_TEX_INST_LD
301 | R500_TEX_SEM_ACQUIRE
302 | R500_TEX_IGNORE_UNCOVERED;
303 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
304 | R500_TEX_SRC_S_SWIZ_R
305 | R500_TEX_SRC_T_SWIZ_G
306 | R500_TEX_DST_ADDR(0)
307 | R500_TEX_DST_R_SWIZ_R
308 | R500_TEX_DST_G_SWIZ_G
309 | R500_TEX_DST_B_SWIZ_B
310 | R500_TEX_DST_A_SWIZ_A;
311 fp->inst[0].inst3 = R500_DX_ADDR(0)
312 | R500_DX_S_SWIZ_R
313 | R500_DX_T_SWIZ_R
314 | R500_DX_R_SWIZ_R
315 | R500_DX_Q_SWIZ_R
316 | R500_DY_ADDR(0)
317 | R500_DY_S_SWIZ_R
318 | R500_DY_T_SWIZ_R
319 | R500_DY_R_SWIZ_R
320 | R500_DY_Q_SWIZ_R;
321 fp->inst[0].inst4 = 0x0;
322 fp->inst[0].inst5 = 0x0;
323
324 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
325 R500_INST_TEX_SEM_WAIT |
326 R500_INST_LAST |
327 R500_INST_RGB_OMASK_R |
328 R500_INST_RGB_OMASK_G |
329 R500_INST_RGB_OMASK_B |
330 R500_INST_ALPHA_OMASK;
331 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
332 R500_RGB_ADDR1(0) |
333 R500_RGB_ADDR1_CONST |
334 R500_RGB_ADDR2(0) |
335 R500_RGB_ADDR2_CONST |
336 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
337 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
338 R500_ALPHA_ADDR1(0) |
339 R500_ALPHA_ADDR1_CONST |
340 R500_ALPHA_ADDR2(0) |
341 R500_ALPHA_ADDR2_CONST |
342 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
343 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
344 R500_ALU_RGB_R_SWIZ_A_R |
345 R500_ALU_RGB_G_SWIZ_A_G |
346 R500_ALU_RGB_B_SWIZ_A_B |
347 R500_ALU_RGB_SEL_B_SRC0 |
348 R500_ALU_RGB_R_SWIZ_B_1 |
349 R500_ALU_RGB_B_SWIZ_B_1 |
350 R500_ALU_RGB_G_SWIZ_B_1;
351 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
352 R500_ALPHA_SWIZ_A_A |
353 R500_ALPHA_SWIZ_B_1;
354 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
355 R500_ALU_RGBA_R_SWIZ_0 |
356 R500_ALU_RGBA_G_SWIZ_0 |
357 R500_ALU_RGBA_B_SWIZ_0 |
358 R500_ALU_RGBA_A_SWIZ_0;
359
360 fp->cs->nrslots = 2;
361 fp->translated = GL_TRUE;
362 }
363
364 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
365 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
366 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
367 /* output_mask */
368 | (fpi->DstReg.WriteMask << 15);
369 } else {
370 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
371 /* pixel_mask */
372 | (fpi->DstReg.WriteMask << 11);
373 }
374
375 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
376 }
377
378 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
379 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
380 * it is technically more accurate and recommended by ATI/AMD. */
381 GLuint src_reg = make_src(fp, src);
382 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
383 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
384 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
385 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
386 | R500_ALU_RGB_SEL_B_SRC0
387 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
388 | R500_ALU_RGB_OMOD_DISABLE;
389 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
390 | R500_ALPHA_ADDRD(dest)
391 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
392 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
393 | R500_ALPHA_OMOD_DISABLE;
394 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
395 | R500_ALU_RGBA_ADDRD(dest)
396 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
397 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
398 }
399
400 static GLboolean parse_program(struct r500_fragment_program *fp)
401 {
402 struct gl_fragment_program *mp = &fp->mesa_program;
403 const struct prog_instruction *inst = mp->Base.Instructions;
404 struct prog_instruction *fpi;
405 GLuint src[3], dest, temp[2];
406 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
407
408 if (!inst || inst[0].Opcode == OPCODE_END) {
409 ERROR("The program is empty!\n");
410 return GL_FALSE;
411 }
412
413 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
414
415 if (fpi->Opcode != OPCODE_KIL) {
416 dest = make_dest(fp, fpi->DstReg);
417
418 pixel_mask = fpi->DstReg.WriteMask << 11;
419 output_mask = fpi->DstReg.WriteMask << 15;
420 }
421
422 switch (fpi->Opcode) {
423 case OPCODE_ABS:
424 emit_alu(fp, counter, fpi);
425 emit_mov(fp, counter, fpi->SrcReg[0], dest);
426 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
427 | R500_ALU_RGB_MOD_B_ABS;
428 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
429 | R500_ALPHA_MOD_B_ABS;
430 break;
431 case OPCODE_ADD:
432 src[0] = make_src(fp, fpi->SrcReg[0]);
433 src[1] = make_src(fp, fpi->SrcReg[1]);
434 /* Variation on MAD: 1*src0+src1 */
435 emit_alu(fp, counter, fpi);
436 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
437 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
438 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
439 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
440 fp->inst[counter].inst3 = /* 1 */
441 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
442 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
443 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
444 | R500_ALPHA_ADDRD(dest)
445 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
446 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
447 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
448 | R500_ALU_RGBA_ADDRD(dest)
449 | R500_ALU_RGBA_SEL_C_SRC1
450 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
451 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
452 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
453 break;
454 case OPCODE_CMP:
455 /* This inst's selects need to be swapped as follows:
456 * 0 -> C ; 1 -> B ; 2 -> A */
457 src[0] = make_src(fp, fpi->SrcReg[0]);
458 src[1] = make_src(fp, fpi->SrcReg[1]);
459 src[2] = make_src(fp, fpi->SrcReg[2]);
460 emit_alu(fp, counter, fpi);
461 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
462 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
463 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
464 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
465 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
466 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
467 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
468 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
469 | R500_ALPHA_ADDRD(dest)
470 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
471 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
472 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
473 | R500_ALU_RGBA_ADDRD(dest)
474 | R500_ALU_RGBA_SEL_C_SRC2
475 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
476 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
477 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
478 break;
479 case OPCODE_COS:
480 src[0] = make_src(fp, fpi->SrcReg[0]);
481 emit_alu(fp, counter, fpi);
482 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
483 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
484 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
485 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
486 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
487 | R500_ALPHA_ADDRD(dest)
488 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
489 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
490 | R500_ALU_RGBA_ADDRD(dest);
491 break;
492 case OPCODE_DP3:
493 src[0] = make_src(fp, fpi->SrcReg[0]);
494 src[1] = make_src(fp, fpi->SrcReg[1]);
495 emit_alu(fp, counter, fpi);
496 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
497 | R500_RGB_ADDR1(src[1]);
498 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
499 | R500_ALPHA_ADDR1(src[1]);
500 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
501 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
502 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
503 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
504 | R500_ALPHA_ADDRD(dest)
505 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
506 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
507 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
508 | R500_ALU_RGBA_ADDRD(dest);
509 break;
510 case OPCODE_DP4:
511 src[0] = make_src(fp, fpi->SrcReg[0]);
512 src[1] = make_src(fp, fpi->SrcReg[1]);
513 /* Based on DP3 */
514 emit_alu(fp, counter, fpi);
515 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
516 | R500_RGB_ADDR1(src[1]);
517 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
518 | R500_ALPHA_ADDR1(src[1]);
519 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
520 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
521 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
522 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
523 | R500_ALPHA_ADDRD(dest)
524 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
525 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
526 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
527 | R500_ALU_RGBA_ADDRD(dest);
528 break;
529 case OPCODE_DPH:
530 src[0] = make_src(fp, fpi->SrcReg[0]);
531 src[1] = make_src(fp, fpi->SrcReg[1]);
532 /* Based on DP3 */
533 emit_alu(fp, counter, fpi);
534 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
535 | R500_RGB_ADDR1(src[1]);
536 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
537 | R500_ALPHA_ADDR1(src[1]);
538 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
539 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
540 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
541 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
542 | R500_ALPHA_ADDRD(dest)
543 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
544 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
545 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
546 | R500_ALU_RGBA_ADDRD(dest);
547 break;
548 case OPCODE_EX2:
549 src[0] = make_src(fp, fpi->SrcReg[0]);
550 emit_alu(fp, counter, fpi);
551 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
552 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
553 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
554 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
555 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
556 | R500_ALPHA_ADDRD(dest)
557 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
558 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
559 | R500_ALU_RGBA_ADDRD(dest);
560 break;
561 case OPCODE_FRC:
562 src[0] = make_src(fp, fpi->SrcReg[0]);
563 emit_alu(fp, counter, fpi);
564 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
565 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
566 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
567 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
568 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
569 | R500_ALPHA_ADDRD(dest)
570 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
571 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
572 | R500_ALU_RGBA_ADDRD(dest);
573 break;
574 case OPCODE_KIL:
575 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
576 break;
577 case OPCODE_LG2:
578 src[0] = make_src(fp, fpi->SrcReg[0]);
579 emit_alu(fp, counter, fpi);
580 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
581 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
582 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
583 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
584 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
585 | R500_ALPHA_ADDRD(dest)
586 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
587 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
588 | R500_ALU_RGBA_ADDRD(dest);
589 break;
590 case OPCODE_MAD:
591 src[0] = make_src(fp, fpi->SrcReg[0]);
592 src[1] = make_src(fp, fpi->SrcReg[1]);
593 src[2] = make_src(fp, fpi->SrcReg[2]);
594 emit_alu(fp, counter, fpi);
595 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
596 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
597 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
598 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
599 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
600 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
601 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
602 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
603 | R500_ALPHA_ADDRD(dest)
604 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
605 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
606 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
607 | R500_ALU_RGBA_ADDRD(dest)
608 | R500_ALU_RGBA_SEL_C_SRC2
609 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
610 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
611 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
612 break;
613 case OPCODE_MAX:
614 src[0] = make_src(fp, fpi->SrcReg[0]);
615 src[1] = make_src(fp, fpi->SrcReg[1]);
616 emit_alu(fp, counter, fpi);
617 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
618 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
619 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
620 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
621 | R500_ALU_RGB_SEL_B_SRC1
622 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
623 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
624 | R500_ALPHA_ADDRD(dest)
625 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
626 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
627 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
628 | R500_ALU_RGBA_ADDRD(dest);
629 break;
630 case OPCODE_MIN:
631 src[0] = make_src(fp, fpi->SrcReg[0]);
632 src[1] = make_src(fp, fpi->SrcReg[1]);
633 emit_alu(fp, counter, fpi);
634 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
635 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
636 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
637 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
638 | R500_ALU_RGB_SEL_B_SRC1
639 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
640 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
641 | R500_ALPHA_ADDRD(dest)
642 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
643 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
644 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
645 | R500_ALU_RGBA_ADDRD(dest);
646 break;
647 case OPCODE_MOV:
648 emit_alu(fp, counter, fpi);
649 emit_mov(fp, counter, fpi->SrcReg[0], dest);
650 break;
651 case OPCODE_MUL:
652 src[0] = make_src(fp, fpi->SrcReg[0]);
653 src[1] = make_src(fp, fpi->SrcReg[1]);
654 /* Variation on MAD: src0*src1+0 */
655 emit_alu(fp, counter, fpi);
656 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
657 | R500_RGB_ADDR1(src[1]);
658 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
659 | R500_ALPHA_ADDR1(src[1]);
660 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
661 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
662 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
663 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
664 | R500_ALPHA_ADDRD(dest)
665 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
666 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
667 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
668 | R500_ALU_RGBA_ADDRD(dest)
669 // | R500_ALU_RGBA_SEL_C_SRC2
670 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
671 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
672 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
673 break;
674 case OPCODE_RCP:
675 src[0] = make_src(fp, fpi->SrcReg[0]);
676 emit_alu(fp, counter, fpi);
677 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
678 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
679 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
680 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
681 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
682 | R500_ALPHA_ADDRD(dest)
683 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
684 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
685 | R500_ALU_RGBA_ADDRD(dest);
686 break;
687 case OPCODE_RSQ:
688 src[0] = make_src(fp, fpi->SrcReg[0]);
689 emit_alu(fp, counter, fpi);
690 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
691 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
692 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
693 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
694 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
695 | R500_ALPHA_ADDRD(dest)
696 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
697 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
698 | R500_ALU_RGBA_ADDRD(dest);
699 break;
700 case OPCODE_SCS:
701 /* TODO: Make this elegant! */
702 /* Do a cosine, then a sine, masking out the channels we want to protect. */
703 src[0] = make_src(fp, fpi->SrcReg[0]);
704 /* Cosine only goes in R (x) channel. */
705 fpi->DstReg.WriteMask = 0x1;
706 emit_alu(fp, counter, fpi);
707 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
708 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
709 | R500_INST_TEX_SEM_WAIT | 0x1 << 14;
710 } else {
711 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
712 | R500_INST_TEX_SEM_WAIT | 0x1 << 11;
713 }
714 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
715 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
716 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
717 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
718 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
719 | R500_ALPHA_ADDRD(dest)
720 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
721 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
722 | R500_ALU_RGBA_ADDRD(dest);
723 counter++;
724 /* Sine only goes in G (y) channel. */
725 fpi->DstReg.WriteMask = 0x2;
726 emit_alu(fp, counter, fpi);
727 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
728 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
729 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
730 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
731 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
732 | R500_ALPHA_ADDRD(dest)
733 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
734 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
735 | R500_ALU_RGBA_ADDRD(dest);
736 /* Put 0 into B,A (z,w) channels.
737 counter++;
738 fpi->DstReg.WriteMask = 0xC;
739 emit_alu(fp, counter, fpi);
740 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
741 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
742 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
743 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
744 | R500_ALU_RGB_SEL_B_SRC0
745 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
746 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
747 | R500_ALPHA_ADDRD(dest)
748 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
749 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
750 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
751 | R500_ALU_RGBA_ADDRD(dest)
752 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
753 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
754 break;
755 case OPCODE_SGE:
756 /* We use SRCP, so as a precaution we're
757 * going to set NOP in previous inst, if possible. */
758 /* This inst's selects need to be swapped as follows:
759 * 0 -> C ; 1 -> B ; 2 -> A */
760 src[0] = make_src(fp, fpi->SrcReg[0]);
761 src[1] = make_src(fp, fpi->SrcReg[1]);
762 emit_alu(fp, counter, fpi);
763 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
764 | R500_RGB_ADDR1(src[1])
765 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
766 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
767 | R500_ALPHA_ADDR1(src[1])
768 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
769 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
770 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
771 | R500_ALU_RGB_SEL_B_SRC1
772 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
773 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
774 | R500_ALPHA_ADDRD(dest)
775 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
776 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
777 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
778 | R500_ALU_RGBA_ADDRD(dest)
779 | R500_ALU_RGBA_SEL_C_SRCP
780 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
781 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
782 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
783 break;
784 case OPCODE_SIN:
785 src[0] = make_src(fp, fpi->SrcReg[0]);
786 emit_alu(fp, counter, fpi);
787 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
788 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
789 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
790 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
791 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
792 | R500_ALPHA_ADDRD(dest)
793 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
794 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
795 | R500_ALU_RGBA_ADDRD(dest);
796 break;
797 case OPCODE_SLT:
798 /* We use SRCP, so as a precaution we're
799 * going to set NOP in previous inst, if possible. */
800 /* This inst's selects need to be swapped as follows:
801 * 0 -> C ; 1 -> B ; 2 -> A */
802 src[0] = make_src(fp, fpi->SrcReg[0]);
803 src[1] = make_src(fp, fpi->SrcReg[1]);
804 emit_alu(fp, counter, fpi);
805 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
806 | R500_RGB_ADDR1(src[1])
807 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
808 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
809 | R500_ALPHA_ADDR1(src[1])
810 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
811 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
812 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
813 | R500_ALU_RGB_SEL_B_SRC1
814 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
815 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
816 | R500_ALPHA_ADDRD(dest)
817 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
818 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
819 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
820 | R500_ALU_RGBA_ADDRD(dest)
821 | R500_ALU_RGBA_SEL_C_SRCP
822 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
823 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
824 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
825 break;
826 case OPCODE_SUB:
827 src[0] = make_src(fp, fpi->SrcReg[0]);
828 src[1] = make_src(fp, fpi->SrcReg[1]);
829 /* Variation on MAD: 1*src0-src1 */
830 emit_alu(fp, counter, fpi);
831 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
832 | R500_RGB_ADDR2(src[1]);
833 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
834 | R500_ALPHA_ADDR2(src[1]);
835 fp->inst[counter].inst3 = /* 1 */
836 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
837 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
838 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
839 | R500_ALPHA_ADDRD(dest)
840 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
841 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
842 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
843 | R500_ALU_RGBA_ADDRD(dest)
844 | R500_ALU_RGBA_SEL_C_SRC2
845 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
846 | R500_ALU_RGBA_MOD_C_NEG
847 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
848 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
849 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
850 break;
851 case OPCODE_SWZ:
852 /* TODO: Negation masks! */
853 emit_alu(fp, counter, fpi);
854 emit_mov(fp, counter, fpi->SrcReg[0], dest);
855 break;
856 case OPCODE_TEX:
857 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
858 if (fpi->DstReg.File == PROGRAM_OUTPUT)
859 counter++;
860 break;
861 case OPCODE_TXB:
862 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
863 if (fpi->DstReg.File == PROGRAM_OUTPUT)
864 counter++;
865 break;
866 case OPCODE_TXP:
867 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
868 if (fpi->DstReg.File == PROGRAM_OUTPUT)
869 counter++;
870 break;
871 default:
872 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
873 break;
874 }
875
876 /* Finishing touches */
877 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
878 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
879 }
880
881 counter++;
882
883 if (fp->error)
884 return GL_FALSE;
885
886 }
887
888 /* Finish him! (If it's an ALU/OUT instruction...) */
889 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
890 fp->inst[counter-1].inst0 |= R500_INST_LAST;
891 } else {
892 /* We still need to put an output inst, right? */
893 WARN_ONCE("Final FP instruction is not an OUT.\n");
894 #if 0
895
896 #endif
897 }
898
899 fp->cs->nrslots = counter;
900
901 fp->max_temp_idx++;
902
903 return GL_TRUE;
904 }
905
906 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
907 {
908 struct r300_pfs_compile_state *cs = NULL;
909 struct gl_fragment_program *mp = &fp->mesa_program;
910 struct prog_instruction *fpi;
911 GLuint InputsRead = mp->Base.InputsRead;
912 GLuint temps_used = 0; /* for fp->temps[] */
913 int i, j;
914
915 /* New compile, reset tracking data */
916 fp->optimization =
917 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
918 fp->translated = GL_FALSE;
919 fp->error = GL_FALSE;
920 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
921 fp->cur_node = 0;
922 fp->first_node_has_tex = 0;
923 fp->const_nr = 0;
924 /* Size of pixel stack, plus 1. */
925 fp->max_temp_idx = 1;
926 /* Temp register offset. */
927 fp->temp_reg_offset = 0;
928 fp->node[0].alu_end = -1;
929 fp->node[0].tex_end = -1;
930
931 _mesa_memset(cs, 0, sizeof(*fp->cs));
932 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
933 for (j = 0; j < 3; j++) {
934 cs->slot[i].vsrc[j] = SRC_CONST;
935 cs->slot[i].ssrc[j] = SRC_CONST;
936 }
937 }
938
939 /* Work out what temps the Mesa inputs correspond to, this must match
940 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
941 * configures itself based on the fragprog's InputsRead
942 *
943 * NOTE: this depends on get_hw_temp() allocating registers in order,
944 * starting from register 0, so we're just going to do that instead.
945 */
946
947 /* Texcoords come first */
948 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
949 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
950 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
951 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
952 fp->temp_reg_offset;
953 fp->temp_reg_offset++;
954 }
955 }
956 InputsRead &= ~FRAG_BITS_TEX_ANY;
957
958 /* fragment position treated as a texcoord */
959 if (InputsRead & FRAG_BIT_WPOS) {
960 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
961 cs->inputs[FRAG_ATTRIB_WPOS].reg =
962 fp->temp_reg_offset;
963 fp->temp_reg_offset++;
964 }
965 InputsRead &= ~FRAG_BIT_WPOS;
966
967 /* Then primary colour */
968 if (InputsRead & FRAG_BIT_COL0) {
969 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
970 cs->inputs[FRAG_ATTRIB_COL0].reg =
971 fp->temp_reg_offset;
972 fp->temp_reg_offset++;
973 }
974 InputsRead &= ~FRAG_BIT_COL0;
975
976 /* Secondary color */
977 if (InputsRead & FRAG_BIT_COL1) {
978 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
979 cs->inputs[FRAG_ATTRIB_COL1].reg =
980 fp->temp_reg_offset;
981 fp->temp_reg_offset++;
982 }
983 InputsRead &= ~FRAG_BIT_COL1;
984
985 /* Anything else */
986 if (InputsRead) {
987 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
988 /* force read from hwreg 0 for now */
989 for (i = 0; i < 32; i++)
990 if (InputsRead & (1 << i))
991 cs->inputs[i].reg = 0;
992 }
993
994 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
995 * That way, we can free up the reg when it's no longer needed
996 */
997 if (!mp->Base.Instructions) {
998 ERROR("No instructions found in program, going to go die now.\n");
999 return;
1000 }
1001
1002 fp->max_temp_idx = fp->temp_reg_offset + 1;
1003
1004 cs->temp_in_use = temps_used;
1005 }
1006
1007 static void update_params(struct r500_fragment_program *fp)
1008 {
1009 struct gl_fragment_program *mp = &fp->mesa_program;
1010
1011 /* Ask Mesa nicely to fill in ParameterValues for us */
1012 if (mp->Base.Parameters)
1013 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1014 }
1015
1016 void r500TranslateFragmentShader(r300ContextPtr r300,
1017 struct r500_fragment_program *fp)
1018 {
1019
1020 struct r300_pfs_compile_state *cs = NULL;
1021
1022 if (!fp->translated) {
1023
1024 /* I need to see what I'm working with! */
1025 fprintf(stderr, "Mesa program:\n");
1026 fprintf(stderr, "-------------\n");
1027 _mesa_print_program(&fp->mesa_program.Base);
1028 fflush(stdout);
1029
1030 init_program(r300, fp);
1031 cs = fp->cs;
1032
1033 if (parse_program(fp) == GL_FALSE) {
1034 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1035 dumb_shader(fp);
1036 fp->inst_offset = 0;
1037 fp->inst_end = cs->nrslots - 1;
1038 return;
1039 }
1040 fp->inst_offset = 0;
1041 fp->inst_end = cs->nrslots - 1;
1042
1043 fp->translated = GL_TRUE;
1044 if (RADEON_DEBUG & DEBUG_PIXEL)
1045 dump_program(fp);
1046
1047
1048 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1049 }
1050
1051 update_params(fp);
1052
1053 }
1054
1055 static char *toswiz(int swiz_val) {
1056 switch(swiz_val) {
1057 case 0: return "R";
1058 case 1: return "G";
1059 case 2: return "B";
1060 case 3: return "A";
1061 case 4: return "0";
1062 case 5: return "1/2";
1063 case 6: return "1";
1064 case 7: return "U";
1065 }
1066 return NULL;
1067 }
1068
1069 static char *toop(int op_val)
1070 {
1071 char *str;
1072 switch (op_val) {
1073 case 0: str = "MAD"; break;
1074 case 1: str = "DP3"; break;
1075 case 2: str = "DP4"; break;
1076 case 3: str = "D2A"; break;
1077 case 4: str = "MIN"; break;
1078 case 5: str = "MAX"; break;
1079 case 6: str = "Reserved"; break;
1080 case 7: str = "CND"; break;
1081 case 8: str = "CMP"; break;
1082 case 9: str = "FRC"; break;
1083 case 10: str = "SOP"; break;
1084 case 11: str = "MDH"; break;
1085 case 12: str = "MDV"; break;
1086 }
1087 return str;
1088 }
1089
1090 static char *to_alpha_op(int op_val)
1091 {
1092 char *str = NULL;
1093 switch (op_val) {
1094 case 0: str = "MAD"; break;
1095 case 1: str = "DP"; break;
1096 case 2: str = "MIN"; break;
1097 case 3: str = "MAX"; break;
1098 case 4: str = "Reserved"; break;
1099 case 5: str = "CND"; break;
1100 case 6: str = "CMP"; break;
1101 case 7: str = "FRC"; break;
1102 case 8: str = "EX2"; break;
1103 case 9: str = "LN2"; break;
1104 case 10: str = "RCP"; break;
1105 case 11: str = "RSQ"; break;
1106 case 12: str = "SIN"; break;
1107 case 13: str = "COS"; break;
1108 case 14: str = "MDH"; break;
1109 case 15: str = "MDV"; break;
1110 }
1111 return str;
1112 }
1113
1114 static char *to_mask(int val)
1115 {
1116 char *str = NULL;
1117 switch(val) {
1118 case 0: str = "NONE"; break;
1119 case 1: str = "R"; break;
1120 case 2: str = "G"; break;
1121 case 3: str = "RG"; break;
1122 case 4: str = "B"; break;
1123 case 5: str = "RB"; break;
1124 case 6: str = "GB"; break;
1125 case 7: str = "RGB"; break;
1126 case 8: str = "A"; break;
1127 case 9: str = "AR"; break;
1128 case 10: str = "AG"; break;
1129 case 11: str = "ARG"; break;
1130 case 12: str = "AB"; break;
1131 case 13: str = "ARB"; break;
1132 case 14: str = "AGB"; break;
1133 case 15: str = "ARGB"; break;
1134 }
1135 return str;
1136 }
1137
1138 static void dump_program(struct r500_fragment_program *fp)
1139 {
1140 int pc = 0;
1141 int n;
1142 uint32_t inst;
1143 uint32_t inst0;
1144 char *str = NULL;
1145
1146 for (n = 0; n < fp->inst_end+1; n++) {
1147 inst0 = inst = fp->inst[n].inst0;
1148 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1149 switch(inst & 0x3) {
1150 case R500_INST_TYPE_ALU: str = "ALU"; break;
1151 case R500_INST_TYPE_OUT: str = "OUT"; break;
1152 case R500_INST_TYPE_FC: str = "FC"; break;
1153 case R500_INST_TYPE_TEX: str = "TEX"; break;
1154 };
1155 fprintf(stderr,"%s %s %s %s %s ", str,
1156 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1157 inst & R500_INST_LAST ? "LAST" : "",
1158 inst & R500_INST_NOP ? "NOP" : "",
1159 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1160 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1161 to_mask((inst >> 15) & 0xf));
1162
1163 switch(inst0 & 0x3) {
1164 case 0:
1165 case 1:
1166 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1167 inst = fp->inst[n].inst1;
1168
1169 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1170 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1171 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1172 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1173 (inst >> 30));
1174
1175 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1176 inst = fp->inst[n].inst2;
1177 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1178 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1179 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1180 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1181 (inst >> 30));
1182 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1183 inst = fp->inst[n].inst3;
1184 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1185 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1186 (inst >> 11) & 0x3,
1187 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1188 (inst >> 24) & 0x3);
1189
1190
1191 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1192 inst = fp->inst[n].inst4;
1193 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_b_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1194 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1195 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1196 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1197
1198 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1199 inst = fp->inst[n].inst5;
1200 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1201 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1202 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1203 (inst >> 23) & 0x3,
1204 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1205 break;
1206 case 2:
1207 break;
1208 case 3:
1209 fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1);
1210 fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2);
1211 fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3);
1212 break;
1213 }
1214 fprintf(stderr,"\n");
1215 }
1216
1217 }