r5xx: Fixup SOP insts.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static void dump_program(struct r500_fragment_program *fp);
97
98 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
99 GLuint swiz = 0x0;
100 GLuint temp;
101 /* This could be optimized, but it should be plenty fast already. */
102 int i;
103 for (i = 0; i < 3; i++) {
104 temp = GET_SWZ(src.Swizzle, i);
105 /* Fix SWIZZLE_ONE */
106 if (temp == 5) temp++;
107 swiz += temp << i*3;
108 }
109 return swiz;
110 }
111
112 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
113 GLuint swiz = GET_SWZ(src.Swizzle, 3);
114
115 if (swiz == 5) swiz++;
116 return swiz;
117 }
118
119 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
120 GLuint swiz = GET_SWZ(src.Swizzle, 0);
121
122 if (swiz == 5) swiz++;
123 return swiz;
124 }
125
126 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
127 GLuint swiz = 0x0;
128 GLuint temp = src.Swizzle;
129 int i;
130 for (i = 0; i < 4; i++) {
131 swiz += (temp & 0x3) << i*2;
132 temp >>= 3;
133 }
134 return swiz;
135 }
136
137 static int get_temp(struct r500_fragment_program *fp) {
138 return fp->max_temp_idx + 1;
139 }
140
141 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
142 static GLuint emit_const4fv(struct r500_fragment_program *fp,
143 const GLfloat * cp)
144 {
145 GLuint reg = 0x0;
146 int index;
147
148 for (index = 0; index < fp->const_nr; ++index) {
149 if (fp->constant[index] == cp)
150 break;
151 }
152
153 if (index >= fp->const_nr) {
154 if (index >= R500_US_NUM_CONST_REGS) {
155 ERROR("Out of hw constants!\n");
156 return reg;
157 }
158
159 fp->const_nr++;
160 fp->constant[index] = cp;
161 }
162
163 reg = index | REG_CONSTANT;
164 return reg;
165 }
166
167 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
168 COMPILE_STATE;
169 GLuint reg;
170 switch (src.File) {
171 case PROGRAM_TEMPORARY:
172 reg = src.Index + fp->temp_reg_offset;
173 break;
174 case PROGRAM_INPUT:
175 reg = cs->inputs[src.Index].reg;
176 break;
177 case PROGRAM_LOCAL_PARAM:
178 reg = emit_const4fv(fp,
179 fp->mesa_program.Base.LocalParams[src.
180 Index]);
181 break;
182 case PROGRAM_ENV_PARAM:
183 reg = emit_const4fv(fp,
184 fp->ctx->FragmentProgram.Parameters[src.
185 Index]);
186 break;
187 case PROGRAM_STATE_VAR:
188 case PROGRAM_NAMED_PARAM:
189 case PROGRAM_CONSTANT:
190 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
191 ParameterValues[src.Index]);
192 break;
193 default:
194 ERROR("Can't handle src.File %x\n", src.File);
195 reg = 0x0;
196 break;
197 }
198 return reg;
199 }
200
201 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
202 GLuint reg;
203 switch (dest.File) {
204 case PROGRAM_TEMPORARY:
205 reg = dest.Index + fp->temp_reg_offset;
206 break;
207 case PROGRAM_OUTPUT:
208 /* Eventually we may need to handle multiple
209 * rendering targets... */
210 reg = dest.Index;
211 break;
212 default:
213 ERROR("Can't handle dest.File %x\n", dest.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static void emit_tex(struct r500_fragment_program *fp,
221 struct prog_instruction *fpi, int opcode, int dest, int counter)
222 {
223 int hwsrc, hwdest;
224 GLuint mask;
225
226 mask = fpi->DstReg.WriteMask << 11;
227 hwsrc = make_src(fp, fpi->SrcReg[0]);
228
229 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
230 hwdest = get_temp(fp);
231 } else {
232 hwdest = dest;
233 }
234
235 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
236 | R500_INST_TEX_SEM_WAIT;
237
238 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
239 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
240
241 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
242 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
243
244 switch (opcode) {
245 case OPCODE_KIL:
246 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
247 break;
248 case OPCODE_TEX:
249 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
250 break;
251 case OPCODE_TXB:
252 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
253 break;
254 case OPCODE_TXP:
255 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
256 break;
257 default:
258 ERROR("emit_tex can't handle opcode %x\n", opcode);
259 }
260
261 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
262 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
263 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
264 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
265 | R500_TEX_DST_ADDR(hwdest)
266 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
267 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
268
269 fp->inst[counter].inst3 = 0x0;
270 fp->inst[counter].inst4 = 0x0;
271 fp->inst[counter].inst5 = 0x0;
272
273 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
274 counter++;
275 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
276 | R500_INST_TEX_SEM_WAIT | (mask << 4);
277 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp));
278 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp));
279 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
280 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
281 | R500_ALU_RGB_SEL_B_SRC0
282 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
283 | R500_ALU_RGB_OMOD_DISABLE;
284 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
285 | R500_ALPHA_ADDRD(dest)
286 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
287 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
288 | R500_ALPHA_OMOD_DISABLE;
289 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
290 | R500_ALU_RGBA_ADDRD(dest)
291 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
292 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
293 }
294 }
295
296 static void dumb_shader(struct r500_fragment_program *fp)
297 {
298 fp->inst[0].inst0 = R500_INST_TYPE_TEX
299 | R500_INST_TEX_SEM_WAIT
300 | R500_INST_RGB_WMASK_R
301 | R500_INST_RGB_WMASK_G
302 | R500_INST_RGB_WMASK_B
303 | R500_INST_ALPHA_WMASK
304 | R500_INST_RGB_CLAMP
305 | R500_INST_ALPHA_CLAMP;
306 fp->inst[0].inst1 = R500_TEX_ID(0)
307 | R500_TEX_INST_LD
308 | R500_TEX_SEM_ACQUIRE
309 | R500_TEX_IGNORE_UNCOVERED;
310 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
311 | R500_TEX_SRC_S_SWIZ_R
312 | R500_TEX_SRC_T_SWIZ_G
313 | R500_TEX_DST_ADDR(0)
314 | R500_TEX_DST_R_SWIZ_R
315 | R500_TEX_DST_G_SWIZ_G
316 | R500_TEX_DST_B_SWIZ_B
317 | R500_TEX_DST_A_SWIZ_A;
318 fp->inst[0].inst3 = R500_DX_ADDR(0)
319 | R500_DX_S_SWIZ_R
320 | R500_DX_T_SWIZ_R
321 | R500_DX_R_SWIZ_R
322 | R500_DX_Q_SWIZ_R
323 | R500_DY_ADDR(0)
324 | R500_DY_S_SWIZ_R
325 | R500_DY_T_SWIZ_R
326 | R500_DY_R_SWIZ_R
327 | R500_DY_Q_SWIZ_R;
328 fp->inst[0].inst4 = 0x0;
329 fp->inst[0].inst5 = 0x0;
330
331 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
332 R500_INST_TEX_SEM_WAIT |
333 R500_INST_LAST |
334 R500_INST_RGB_OMASK_R |
335 R500_INST_RGB_OMASK_G |
336 R500_INST_RGB_OMASK_B |
337 R500_INST_ALPHA_OMASK;
338 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
339 R500_RGB_ADDR1(0) |
340 R500_RGB_ADDR1_CONST |
341 R500_RGB_ADDR2(0) |
342 R500_RGB_ADDR2_CONST |
343 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
344 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
345 R500_ALPHA_ADDR1(0) |
346 R500_ALPHA_ADDR1_CONST |
347 R500_ALPHA_ADDR2(0) |
348 R500_ALPHA_ADDR2_CONST |
349 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
350 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
351 R500_ALU_RGB_R_SWIZ_A_R |
352 R500_ALU_RGB_G_SWIZ_A_G |
353 R500_ALU_RGB_B_SWIZ_A_B |
354 R500_ALU_RGB_SEL_B_SRC0 |
355 R500_ALU_RGB_R_SWIZ_B_1 |
356 R500_ALU_RGB_B_SWIZ_B_1 |
357 R500_ALU_RGB_G_SWIZ_B_1;
358 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
359 R500_ALPHA_SWIZ_A_A |
360 R500_ALPHA_SWIZ_B_1;
361 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
362 R500_ALU_RGBA_R_SWIZ_0 |
363 R500_ALU_RGBA_G_SWIZ_0 |
364 R500_ALU_RGBA_B_SWIZ_0 |
365 R500_ALU_RGBA_A_SWIZ_0;
366
367 fp->cs->nrslots = 2;
368 fp->translated = GL_TRUE;
369 }
370
371 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
372 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
373 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
374 /* output_mask */
375 | (fpi->DstReg.WriteMask << 15);
376 } else {
377 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
378 /* pixel_mask */
379 | (fpi->DstReg.WriteMask << 11);
380 }
381
382 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
383 }
384
385 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
386 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
387 * it is technically more accurate and recommended by ATI/AMD. */
388 GLuint src_reg = make_src(fp, src);
389 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
390 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
391 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
392 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
393 | R500_ALU_RGB_SEL_B_SRC0
394 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
395 | R500_ALU_RGB_OMOD_DISABLE;
396 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
397 | R500_ALPHA_ADDRD(dest)
398 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
399 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
400 | R500_ALPHA_OMOD_DISABLE;
401 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
402 | R500_ALU_RGBA_ADDRD(dest)
403 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
404 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
405 }
406
407 static GLboolean parse_program(struct r500_fragment_program *fp)
408 {
409 struct gl_fragment_program *mp = &fp->mesa_program;
410 const struct prog_instruction *inst = mp->Base.Instructions;
411 struct prog_instruction *fpi;
412 GLuint src[3], dest, temp[2];
413 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
414
415 if (!inst || inst[0].Opcode == OPCODE_END) {
416 ERROR("The program is empty!\n");
417 return GL_FALSE;
418 }
419
420 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
421
422 if (fpi->Opcode != OPCODE_KIL) {
423 dest = make_dest(fp, fpi->DstReg);
424
425 pixel_mask = fpi->DstReg.WriteMask << 11;
426 output_mask = fpi->DstReg.WriteMask << 15;
427 }
428
429 switch (fpi->Opcode) {
430 case OPCODE_ABS:
431 emit_alu(fp, counter, fpi);
432 emit_mov(fp, counter, fpi->SrcReg[0], dest);
433 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
434 | R500_ALU_RGB_MOD_B_ABS;
435 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
436 | R500_ALPHA_MOD_B_ABS;
437 break;
438 case OPCODE_ADD:
439 src[0] = make_src(fp, fpi->SrcReg[0]);
440 src[1] = make_src(fp, fpi->SrcReg[1]);
441 /* Variation on MAD: 1*src0+src1 */
442 emit_alu(fp, counter, fpi);
443 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
444 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
445 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
446 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
447 fp->inst[counter].inst3 = /* 1 */
448 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
449 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
450 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
451 | R500_ALPHA_ADDRD(dest)
452 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
453 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
454 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
455 | R500_ALU_RGBA_ADDRD(dest)
456 | R500_ALU_RGBA_SEL_C_SRC1
457 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
458 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
459 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
460 break;
461 case OPCODE_CMP:
462 /* This inst's selects need to be swapped as follows:
463 * 0 -> C ; 1 -> B ; 2 -> A */
464 src[0] = make_src(fp, fpi->SrcReg[0]);
465 src[1] = make_src(fp, fpi->SrcReg[1]);
466 src[2] = make_src(fp, fpi->SrcReg[2]);
467 emit_alu(fp, counter, fpi);
468 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
469 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
470 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
471 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
472 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
473 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
474 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
475 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
476 | R500_ALPHA_ADDRD(dest)
477 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
478 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
479 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
480 | R500_ALU_RGBA_ADDRD(dest)
481 | R500_ALU_RGBA_SEL_C_SRC2
482 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
483 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
484 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
485 break;
486 case OPCODE_COS:
487 src[0] = make_src(fp, fpi->SrcReg[0]);
488 emit_alu(fp, counter, fpi);
489 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
490 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
491 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
492 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
493 | R500_ALPHA_ADDRD(dest)
494 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
495 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
496 | R500_ALU_RGBA_ADDRD(dest);
497 break;
498 case OPCODE_DP3:
499 src[0] = make_src(fp, fpi->SrcReg[0]);
500 src[1] = make_src(fp, fpi->SrcReg[1]);
501 emit_alu(fp, counter, fpi);
502 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
503 | R500_RGB_ADDR1(src[1]);
504 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
505 | R500_ALPHA_ADDR1(src[1]);
506 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
507 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
508 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
509 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
510 | R500_ALPHA_ADDRD(dest)
511 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
512 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
513 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
514 | R500_ALU_RGBA_ADDRD(dest);
515 break;
516 case OPCODE_DP4:
517 src[0] = make_src(fp, fpi->SrcReg[0]);
518 src[1] = make_src(fp, fpi->SrcReg[1]);
519 /* Based on DP3 */
520 emit_alu(fp, counter, fpi);
521 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
522 | R500_RGB_ADDR1(src[1]);
523 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
524 | R500_ALPHA_ADDR1(src[1]);
525 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
526 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
527 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
528 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
529 | R500_ALPHA_ADDRD(dest)
530 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
531 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
532 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
533 | R500_ALU_RGBA_ADDRD(dest);
534 break;
535 case OPCODE_DPH:
536 src[0] = make_src(fp, fpi->SrcReg[0]);
537 src[1] = make_src(fp, fpi->SrcReg[1]);
538 /* Based on DP3 */
539 emit_alu(fp, counter, fpi);
540 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
541 | R500_RGB_ADDR1(src[1]);
542 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
543 | R500_ALPHA_ADDR1(src[1]);
544 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
545 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
546 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
547 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
548 | R500_ALPHA_ADDRD(dest)
549 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
550 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
551 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
552 | R500_ALU_RGBA_ADDRD(dest);
553 break;
554 case OPCODE_EX2:
555 src[0] = make_src(fp, fpi->SrcReg[0]);
556 emit_alu(fp, counter, fpi);
557 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
558 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
559 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
560 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
561 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
562 | R500_ALPHA_ADDRD(dest)
563 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
564 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
565 | R500_ALU_RGBA_ADDRD(dest);
566 break;
567 case OPCODE_FRC:
568 src[0] = make_src(fp, fpi->SrcReg[0]);
569 emit_alu(fp, counter, fpi);
570 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
571 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
572 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
573 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
574 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
575 | R500_ALPHA_ADDRD(dest)
576 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
577 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
578 | R500_ALU_RGBA_ADDRD(dest);
579 break;
580 case OPCODE_KIL:
581 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
582 break;
583 case OPCODE_LG2:
584 src[0] = make_src(fp, fpi->SrcReg[0]);
585 emit_alu(fp, counter, fpi);
586 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
587 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
588 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
589 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
590 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
591 | R500_ALPHA_ADDRD(dest)
592 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
593 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
594 | R500_ALU_RGBA_ADDRD(dest);
595 break;
596 case OPCODE_MAD:
597 src[0] = make_src(fp, fpi->SrcReg[0]);
598 src[1] = make_src(fp, fpi->SrcReg[1]);
599 src[2] = make_src(fp, fpi->SrcReg[2]);
600 emit_alu(fp, counter, fpi);
601 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
602 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
603 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
604 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
605 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
606 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
607 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
608 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
609 | R500_ALPHA_ADDRD(dest)
610 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
611 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
612 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
613 | R500_ALU_RGBA_ADDRD(dest)
614 | R500_ALU_RGBA_SEL_C_SRC2
615 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
616 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
617 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
618 break;
619 case OPCODE_MAX:
620 src[0] = make_src(fp, fpi->SrcReg[0]);
621 src[1] = make_src(fp, fpi->SrcReg[1]);
622 emit_alu(fp, counter, fpi);
623 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
624 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
625 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
626 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
627 | R500_ALU_RGB_SEL_B_SRC1
628 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
629 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
630 | R500_ALPHA_ADDRD(dest)
631 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
632 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
633 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
634 | R500_ALU_RGBA_ADDRD(dest);
635 break;
636 case OPCODE_MIN:
637 src[0] = make_src(fp, fpi->SrcReg[0]);
638 src[1] = make_src(fp, fpi->SrcReg[1]);
639 emit_alu(fp, counter, fpi);
640 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
641 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
642 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
643 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
644 | R500_ALU_RGB_SEL_B_SRC1
645 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
646 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
647 | R500_ALPHA_ADDRD(dest)
648 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
649 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
650 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
651 | R500_ALU_RGBA_ADDRD(dest);
652 break;
653 case OPCODE_MOV:
654 emit_alu(fp, counter, fpi);
655 emit_mov(fp, counter, fpi->SrcReg[0], dest);
656 break;
657 case OPCODE_MUL:
658 src[0] = make_src(fp, fpi->SrcReg[0]);
659 src[1] = make_src(fp, fpi->SrcReg[1]);
660 /* Variation on MAD: src0*src1+0 */
661 emit_alu(fp, counter, fpi);
662 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
663 | R500_RGB_ADDR1(src[1]);
664 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
665 | R500_ALPHA_ADDR1(src[1]);
666 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
667 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
668 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
669 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
670 | R500_ALPHA_ADDRD(dest)
671 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
672 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
673 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
674 | R500_ALU_RGBA_ADDRD(dest)
675 // | R500_ALU_RGBA_SEL_C_SRC2
676 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
677 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
678 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
679 break;
680 case OPCODE_RCP:
681 src[0] = make_src(fp, fpi->SrcReg[0]);
682 emit_alu(fp, counter, fpi);
683 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
684 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
685 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
686 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
687 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
688 | R500_ALPHA_ADDRD(dest)
689 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
690 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
691 | R500_ALU_RGBA_ADDRD(dest);
692 break;
693 case OPCODE_RSQ:
694 src[0] = make_src(fp, fpi->SrcReg[0]);
695 emit_alu(fp, counter, fpi);
696 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
697 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
698 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
699 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
700 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
701 | R500_ALPHA_ADDRD(dest)
702 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
703 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
704 | R500_ALU_RGBA_ADDRD(dest);
705 break;
706 case OPCODE_SCS:
707 /* TODO: Make this elegant! */
708 /* Do a cosine, then a sine, masking out the channels we want to protect. */
709 src[0] = make_src(fp, fpi->SrcReg[0]);
710 /* Cosine only goes in R (x) channel. */
711 fpi->DstReg.WriteMask = 0x1;
712 emit_alu(fp, counter, fpi);
713 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
714 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
715 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
716 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
717 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
718 | R500_ALPHA_ADDRD(dest)
719 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
720 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
721 | R500_ALU_RGBA_ADDRD(dest);
722 counter++;
723 /* Sine only goes in G (y) channel. */
724 fpi->DstReg.WriteMask = 0x2;
725 emit_alu(fp, counter, fpi);
726 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
727 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
728 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
729 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
730 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
731 | R500_ALPHA_ADDRD(dest)
732 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
733 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
734 | R500_ALU_RGBA_ADDRD(dest);
735 /* Put 0 into B,A (z,w) channels.
736 counter++;
737 fpi->DstReg.WriteMask = 0xC;
738 emit_alu(fp, counter, fpi);
739 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
740 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
741 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
742 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
743 | R500_ALU_RGB_SEL_B_SRC0
744 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
745 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
746 | R500_ALPHA_ADDRD(dest)
747 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
748 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
749 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
750 | R500_ALU_RGBA_ADDRD(dest)
751 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
752 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */
753 break;
754 case OPCODE_SGE:
755 /* We use SRCP, so as a precaution we're
756 * going to set NOP in previous inst, if possible. */
757 /* This inst's selects need to be swapped as follows:
758 * 0 -> C ; 1 -> B ; 2 -> A */
759 src[0] = make_src(fp, fpi->SrcReg[0]);
760 src[1] = make_src(fp, fpi->SrcReg[1]);
761 emit_alu(fp, counter, fpi);
762 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
763 | R500_RGB_ADDR1(src[1])
764 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
765 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
766 | R500_ALPHA_ADDR1(src[1])
767 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
768 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
769 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
770 | R500_ALU_RGB_SEL_B_SRC1
771 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
772 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
773 | R500_ALPHA_ADDRD(dest)
774 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
775 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
776 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
777 | R500_ALU_RGBA_ADDRD(dest)
778 | R500_ALU_RGBA_SEL_C_SRCP
779 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
780 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
781 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
782 break;
783 case OPCODE_SIN:
784 src[0] = make_src(fp, fpi->SrcReg[0]);
785 emit_alu(fp, counter, fpi);
786 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
787 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
788 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
789 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
790 | R500_ALPHA_ADDRD(dest)
791 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
792 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
793 | R500_ALU_RGBA_ADDRD(dest);
794 break;
795 case OPCODE_SLT:
796 /* We use SRCP, so as a precaution we're
797 * going to set NOP in previous inst, if possible. */
798 /* This inst's selects need to be swapped as follows:
799 * 0 -> C ; 1 -> B ; 2 -> A */
800 src[0] = make_src(fp, fpi->SrcReg[0]);
801 src[1] = make_src(fp, fpi->SrcReg[1]);
802 emit_alu(fp, counter, fpi);
803 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
804 | R500_RGB_ADDR1(src[1])
805 | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
806 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
807 | R500_ALPHA_ADDR1(src[1])
808 | R500_ALPHA_SRCP_OP_A1_MINUS_A0;
809 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
810 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
811 | R500_ALU_RGB_SEL_B_SRC1
812 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
813 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
814 | R500_ALPHA_ADDRD(dest)
815 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
816 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
817 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
818 | R500_ALU_RGBA_ADDRD(dest)
819 | R500_ALU_RGBA_SEL_C_SRCP
820 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
821 | R500_ALU_RGBA_ALPHA_SEL_C_SRCP
822 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
823 break;
824 case OPCODE_SUB:
825 src[0] = make_src(fp, fpi->SrcReg[0]);
826 src[1] = make_src(fp, fpi->SrcReg[1]);
827 /* Variation on MAD: 1*src0-src1 */
828 emit_alu(fp, counter, fpi);
829 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
830 | R500_RGB_ADDR2(src[1]);
831 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
832 | R500_ALPHA_ADDR2(src[1]);
833 fp->inst[counter].inst3 = /* 1 */
834 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
835 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
836 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
837 | R500_ALPHA_ADDRD(dest)
838 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
839 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
840 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
841 | R500_ALU_RGBA_ADDRD(dest)
842 | R500_ALU_RGBA_SEL_C_SRC2
843 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
844 | R500_ALU_RGBA_MOD_C_NEG
845 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
846 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
847 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
848 break;
849 case OPCODE_SWZ:
850 /* TODO: Negation masks! */
851 emit_alu(fp, counter, fpi);
852 emit_mov(fp, counter, fpi->SrcReg[0], dest);
853 break;
854 case OPCODE_TEX:
855 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
856 if (fpi->DstReg.File == PROGRAM_OUTPUT)
857 counter++;
858 break;
859 case OPCODE_TXB:
860 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
861 if (fpi->DstReg.File == PROGRAM_OUTPUT)
862 counter++;
863 break;
864 case OPCODE_TXP:
865 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
866 if (fpi->DstReg.File == PROGRAM_OUTPUT)
867 counter++;
868 break;
869 default:
870 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
871 break;
872 }
873
874 /* Finishing touches */
875 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
876 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
877 }
878
879 counter++;
880
881 if (fp->error)
882 return GL_FALSE;
883
884 }
885
886 /* Finish him! (If it's an ALU/OUT instruction...) */
887 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
888 fp->inst[counter-1].inst0 |= R500_INST_LAST;
889 } else {
890 /* We still need to put an output inst, right? */
891 WARN_ONCE("Final FP instruction is not an OUT.\n");
892 #if 0
893
894 #endif
895 }
896
897 fp->cs->nrslots = counter;
898
899 fp->max_temp_idx++;
900
901 return GL_TRUE;
902 }
903
904 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
905 {
906 struct r300_pfs_compile_state *cs = NULL;
907 struct gl_fragment_program *mp = &fp->mesa_program;
908 struct prog_instruction *fpi;
909 GLuint InputsRead = mp->Base.InputsRead;
910 GLuint temps_used = 0; /* for fp->temps[] */
911 int i, j;
912
913 /* New compile, reset tracking data */
914 fp->optimization =
915 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
916 fp->translated = GL_FALSE;
917 fp->error = GL_FALSE;
918 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
919 fp->cur_node = 0;
920 fp->first_node_has_tex = 0;
921 fp->const_nr = 0;
922 /* Size of pixel stack, plus 1. */
923 fp->max_temp_idx = 1;
924 /* Temp register offset. */
925 fp->temp_reg_offset = 0;
926 fp->node[0].alu_end = -1;
927 fp->node[0].tex_end = -1;
928
929 _mesa_memset(cs, 0, sizeof(*fp->cs));
930 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
931 for (j = 0; j < 3; j++) {
932 cs->slot[i].vsrc[j] = SRC_CONST;
933 cs->slot[i].ssrc[j] = SRC_CONST;
934 }
935 }
936
937 /* Work out what temps the Mesa inputs correspond to, this must match
938 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
939 * configures itself based on the fragprog's InputsRead
940 *
941 * NOTE: this depends on get_hw_temp() allocating registers in order,
942 * starting from register 0, so we're just going to do that instead.
943 */
944
945 /* Texcoords come first */
946 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
947 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
948 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
949 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
950 fp->temp_reg_offset;
951 fp->temp_reg_offset++;
952 }
953 }
954 InputsRead &= ~FRAG_BITS_TEX_ANY;
955
956 /* fragment position treated as a texcoord */
957 if (InputsRead & FRAG_BIT_WPOS) {
958 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
959 cs->inputs[FRAG_ATTRIB_WPOS].reg =
960 fp->temp_reg_offset;
961 fp->temp_reg_offset++;
962 }
963 InputsRead &= ~FRAG_BIT_WPOS;
964
965 /* Then primary colour */
966 if (InputsRead & FRAG_BIT_COL0) {
967 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
968 cs->inputs[FRAG_ATTRIB_COL0].reg =
969 fp->temp_reg_offset;
970 fp->temp_reg_offset++;
971 }
972 InputsRead &= ~FRAG_BIT_COL0;
973
974 /* Secondary color */
975 if (InputsRead & FRAG_BIT_COL1) {
976 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
977 cs->inputs[FRAG_ATTRIB_COL1].reg =
978 fp->temp_reg_offset;
979 fp->temp_reg_offset++;
980 }
981 InputsRead &= ~FRAG_BIT_COL1;
982
983 /* Anything else */
984 if (InputsRead) {
985 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
986 /* force read from hwreg 0 for now */
987 for (i = 0; i < 32; i++)
988 if (InputsRead & (1 << i))
989 cs->inputs[i].reg = 0;
990 }
991
992 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
993 * That way, we can free up the reg when it's no longer needed
994 */
995 if (!mp->Base.Instructions) {
996 ERROR("No instructions found in program, going to go die now.\n");
997 return;
998 }
999
1000 fp->max_temp_idx = fp->temp_reg_offset + 1;
1001
1002 cs->temp_in_use = temps_used;
1003 }
1004
1005 static void update_params(struct r500_fragment_program *fp)
1006 {
1007 struct gl_fragment_program *mp = &fp->mesa_program;
1008
1009 /* Ask Mesa nicely to fill in ParameterValues for us */
1010 if (mp->Base.Parameters)
1011 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1012 }
1013
1014 void r500TranslateFragmentShader(r300ContextPtr r300,
1015 struct r500_fragment_program *fp)
1016 {
1017
1018 struct r300_pfs_compile_state *cs = NULL;
1019
1020 if (!fp->translated) {
1021
1022
1023
1024 init_program(r300, fp);
1025 cs = fp->cs;
1026
1027 if (parse_program(fp) == GL_FALSE) {
1028 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1029 dumb_shader(fp);
1030 fp->inst_offset = 0;
1031 fp->inst_end = cs->nrslots - 1;
1032 return;
1033 }
1034 fp->inst_offset = 0;
1035 fp->inst_end = cs->nrslots - 1;
1036
1037 fp->translated = GL_TRUE;
1038 if (RADEON_DEBUG & DEBUG_PIXEL) {
1039 dump_program(fp);
1040 fprintf(stderr, "Mesa program:\n");
1041 fprintf(stderr, "-------------\n");
1042 _mesa_print_program(&fp->mesa_program.Base);
1043 fflush(stdout);
1044 }
1045
1046
1047 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1048 }
1049
1050 update_params(fp);
1051
1052 }
1053
1054 static char *toswiz(int swiz_val) {
1055 switch(swiz_val) {
1056 case 0: return "R";
1057 case 1: return "G";
1058 case 2: return "B";
1059 case 3: return "A";
1060 case 4: return "0";
1061 case 5: return "1/2";
1062 case 6: return "1";
1063 case 7: return "U";
1064 }
1065 return NULL;
1066 }
1067
1068 static char *toop(int op_val)
1069 {
1070 char *str;
1071 switch (op_val) {
1072 case 0: str = "MAD"; break;
1073 case 1: str = "DP3"; break;
1074 case 2: str = "DP4"; break;
1075 case 3: str = "D2A"; break;
1076 case 4: str = "MIN"; break;
1077 case 5: str = "MAX"; break;
1078 case 6: str = "Reserved"; break;
1079 case 7: str = "CND"; break;
1080 case 8: str = "CMP"; break;
1081 case 9: str = "FRC"; break;
1082 case 10: str = "SOP"; break;
1083 case 11: str = "MDH"; break;
1084 case 12: str = "MDV"; break;
1085 }
1086 return str;
1087 }
1088
1089 static char *to_alpha_op(int op_val)
1090 {
1091 char *str = NULL;
1092 switch (op_val) {
1093 case 0: str = "MAD"; break;
1094 case 1: str = "DP"; break;
1095 case 2: str = "MIN"; break;
1096 case 3: str = "MAX"; break;
1097 case 4: str = "Reserved"; break;
1098 case 5: str = "CND"; break;
1099 case 6: str = "CMP"; break;
1100 case 7: str = "FRC"; break;
1101 case 8: str = "EX2"; break;
1102 case 9: str = "LN2"; break;
1103 case 10: str = "RCP"; break;
1104 case 11: str = "RSQ"; break;
1105 case 12: str = "SIN"; break;
1106 case 13: str = "COS"; break;
1107 case 14: str = "MDH"; break;
1108 case 15: str = "MDV"; break;
1109 }
1110 return str;
1111 }
1112
1113 static char *to_mask(int val)
1114 {
1115 char *str = NULL;
1116 switch(val) {
1117 case 0: str = "NONE"; break;
1118 case 1: str = "R"; break;
1119 case 2: str = "G"; break;
1120 case 3: str = "RG"; break;
1121 case 4: str = "B"; break;
1122 case 5: str = "RB"; break;
1123 case 6: str = "GB"; break;
1124 case 7: str = "RGB"; break;
1125 case 8: str = "A"; break;
1126 case 9: str = "AR"; break;
1127 case 10: str = "AG"; break;
1128 case 11: str = "ARG"; break;
1129 case 12: str = "AB"; break;
1130 case 13: str = "ARB"; break;
1131 case 14: str = "AGB"; break;
1132 case 15: str = "ARGB"; break;
1133 }
1134 return str;
1135 }
1136
1137 static void dump_program(struct r500_fragment_program *fp)
1138 {
1139 int pc = 0;
1140 int n;
1141 uint32_t inst;
1142 uint32_t inst0;
1143 char *str = NULL;
1144
1145 for (n = 0; n < fp->inst_end+1; n++) {
1146 inst0 = inst = fp->inst[n].inst0;
1147 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1148 switch(inst & 0x3) {
1149 case R500_INST_TYPE_ALU: str = "ALU"; break;
1150 case R500_INST_TYPE_OUT: str = "OUT"; break;
1151 case R500_INST_TYPE_FC: str = "FC"; break;
1152 case R500_INST_TYPE_TEX: str = "TEX"; break;
1153 };
1154 fprintf(stderr,"%s %s %s %s %s ", str,
1155 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1156 inst & R500_INST_LAST ? "LAST" : "",
1157 inst & R500_INST_NOP ? "NOP" : "",
1158 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1159 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1160 to_mask((inst >> 15) & 0xf));
1161
1162 switch(inst0 & 0x3) {
1163 case 0:
1164 case 1:
1165 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1166 inst = fp->inst[n].inst1;
1167
1168 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1169 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1170 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1171 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1172 (inst >> 30));
1173
1174 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1175 inst = fp->inst[n].inst2;
1176 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1177 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1178 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1179 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1180 (inst >> 30));
1181 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1182 inst = fp->inst[n].inst3;
1183 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1184 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1185 (inst >> 11) & 0x3,
1186 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1187 (inst >> 24) & 0x3);
1188
1189
1190 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1191 inst = fp->inst[n].inst4;
1192 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d\n", to_alpha_op(inst & 0xf),
1193 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1194 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1195 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3);
1196
1197 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1198 inst = fp->inst[n].inst5;
1199 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1200 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1201 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1202 (inst >> 23) & 0x3,
1203 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1204 break;
1205 case 2:
1206 break;
1207 case 3:
1208 fprintf(stderr,"1: TEX INST 0x%08x\n", fp->inst[n].inst1);
1209 fprintf(stderr,"2: TEX ADDR 0x%08x\n", fp->inst[n].inst2);
1210 fprintf(stderr,"2: TEX ADDR DXDY 0x%08x\n", fp->inst[n].inst3);
1211 break;
1212 }
1213 fprintf(stderr,"\n");
1214 }
1215
1216 }