r5xx: Add emit_mad() for FP.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 /* Writemasks */
97 #define R500_WRITEMASK_ARGB 0xF
98
99 /* 1/(2pi), needed for quick modulus in trig insts
100 * Thanks to glisse for pointing out how to do it! */
101 static const GLfloat RCP_2PI[] = {0.15915494309189535,
102 0.15915494309189535,
103 0.15915494309189535,
104 0.15915494309189535};
105
106 static void dump_program(struct r500_fragment_program *fp);
107
108 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
109 GLuint swiz = 0x0;
110 GLuint temp;
111 /* This could be optimized, but it should be plenty fast already. */
112 int i;
113 for (i = 0; i < 3; i++) {
114 temp = GET_SWZ(src.Swizzle, i);
115 /* Fix SWIZZLE_ONE */
116 if (temp == 5) temp++;
117 swiz += temp << i*3;
118 }
119 return swiz;
120 }
121
122 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
123 GLuint swiz = GET_SWZ(src.Swizzle, 3);
124
125 if (swiz == 5) swiz++;
126 return swiz;
127 }
128
129 static inline GLuint make_sop_swizzle(struct prog_src_register src) {
130 GLuint swiz = GET_SWZ(src.Swizzle, 0);
131
132 if (swiz == 5) swiz++;
133 return swiz;
134 }
135
136 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
137 GLuint swiz = 0x0;
138 GLuint temp = src.Swizzle;
139 int i;
140 for (i = 0; i < 4; i++) {
141 swiz += (temp & 0x3) << i*2;
142 temp >>= 3;
143 }
144 return swiz;
145 }
146
147 static int get_temp(struct r500_fragment_program *fp, int slot) {
148
149 COMPILE_STATE;
150
151 int r = cs->temp_in_use + 1 + slot;
152
153 if (r > R500_US_NUM_TEMP_REGS) {
154 ERROR("Too many temporary registers requested, can't compile!\n");
155 }
156
157 return r;
158 }
159
160 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
161 static GLuint emit_const4fv(struct r500_fragment_program *fp,
162 const GLfloat * cp)
163 {
164 GLuint reg = 0x0;
165 int index;
166
167 for (index = 0; index < fp->const_nr; ++index) {
168 if (fp->constant[index] == cp)
169 break;
170 }
171
172 if (index >= fp->const_nr) {
173 if (index >= R500_US_NUM_CONST_REGS) {
174 ERROR("Out of hw constants!\n");
175 return reg;
176 }
177
178 fp->const_nr++;
179 fp->constant[index] = cp;
180 }
181
182 reg = index | REG_CONSTANT;
183 return reg;
184 }
185
186 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
187 COMPILE_STATE;
188 GLuint reg;
189 switch (src.File) {
190 case PROGRAM_TEMPORARY:
191 reg = src.Index + fp->temp_reg_offset;
192 break;
193 case PROGRAM_INPUT:
194 reg = cs->inputs[src.Index].reg;
195 break;
196 case PROGRAM_LOCAL_PARAM:
197 reg = emit_const4fv(fp,
198 fp->mesa_program.Base.LocalParams[src.
199 Index]);
200 break;
201 case PROGRAM_ENV_PARAM:
202 reg = emit_const4fv(fp,
203 fp->ctx->FragmentProgram.Parameters[src.
204 Index]);
205 break;
206 case PROGRAM_STATE_VAR:
207 case PROGRAM_NAMED_PARAM:
208 case PROGRAM_CONSTANT:
209 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
210 ParameterValues[src.Index]);
211 break;
212 default:
213 ERROR("Can't handle src.File %x\n", src.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
221 GLuint reg;
222 switch (dest.File) {
223 case PROGRAM_TEMPORARY:
224 reg = dest.Index + fp->temp_reg_offset;
225 break;
226 case PROGRAM_OUTPUT:
227 /* Eventually we may need to handle multiple
228 * rendering targets... */
229 reg = dest.Index;
230 break;
231 default:
232 ERROR("Can't handle dest.File %x\n", dest.File);
233 reg = 0x0;
234 break;
235 }
236 return reg;
237 }
238
239 static void emit_tex(struct r500_fragment_program *fp,
240 struct prog_instruction *fpi, int dest, int counter)
241 {
242 int hwsrc, hwdest;
243 GLuint mask;
244
245 mask = fpi->DstReg.WriteMask << 11;
246 hwsrc = make_src(fp, fpi->SrcReg[0]);
247
248 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
249 hwdest = get_temp(fp, 0);
250 } else {
251 hwdest = dest;
252 }
253
254 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
255 | R500_INST_TEX_SEM_WAIT;
256
257 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
258 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
259
260 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
261 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
262
263 switch (fpi->Opcode) {
264 case OPCODE_KIL:
265 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
266 break;
267 case OPCODE_TEX:
268 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
269 break;
270 case OPCODE_TXB:
271 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
272 break;
273 case OPCODE_TXP:
274 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
275 break;
276 default:
277 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
278 }
279
280 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
281 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
282 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
283 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
284 | R500_TEX_DST_ADDR(hwdest)
285 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
286 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
287
288 fp->inst[counter].inst3 = 0x0;
289 fp->inst[counter].inst4 = 0x0;
290 fp->inst[counter].inst5 = 0x0;
291
292 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
293 counter++;
294 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
295 | R500_INST_TEX_SEM_WAIT | (mask << 4);
296 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
297 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
298 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
299 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
300 | R500_ALU_RGB_SEL_B_SRC0
301 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
302 | R500_ALU_RGB_OMOD_DISABLE;
303 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
304 | R500_ALPHA_ADDRD(dest)
305 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
306 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
307 | R500_ALPHA_OMOD_DISABLE;
308 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
309 | R500_ALU_RGBA_ADDRD(dest)
310 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
311 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
312 }
313 }
314
315 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
316 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
317 fp->inst[counter].inst0 = R500_INST_TYPE_OUT;
318
319 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
320 fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
321
322 if (fpi->DstReg.Index == FRAG_RESULT_DEPR)
323 fp->inst[counter].inst4 = R500_ALPHA_W_OMASK;
324 } else {
325 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
326 /* pixel_mask */
327 | (fpi->DstReg.WriteMask << 11);
328 }
329
330 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
331
332 /* Ideally, we shouldn't have to explicitly clear memory here! */
333 fp->inst[counter].inst1 = 0x0;
334 fp->inst[counter].inst2 = 0x0;
335 fp->inst[counter].inst3 = 0x0;
336 fp->inst[counter].inst5 = 0x0;
337 }
338
339 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
340 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
341 * it is technically more accurate and recommended by ATI/AMD. */
342 GLuint src_reg = make_src(fp, src);
343 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
344 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
345 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
346 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
347 | R500_ALU_RGB_SEL_B_SRC0
348 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
349 | R500_ALU_RGB_OMOD_DISABLE;
350 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
351 | R500_ALPHA_ADDRD(dest)
352 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
353 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
354 | R500_ALPHA_OMOD_DISABLE;
355 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
356 | R500_ALU_RGBA_ADDRD(dest)
357 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
358 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
359 }
360
361 static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) {
362 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
363 * If you can make it pretty or fast, please do so! */
364 emit_alu(fp, counter, fpi);
365 /* Common MAD stuff */
366 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
367 | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg));
368 fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
369 | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg));
370 switch (one) {
371 case 0:
372 case 1:
373 case 2:
374 fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one]));
375 fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one]));
376 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
377 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
378 fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
379 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
380 break;
381 default:
382 WARN_ONCE("Bad src index in emit_mad: %d\n", one);
383 break;
384 }
385 switch (two) {
386 case 0:
387 case 1:
388 case 2:
389 fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two]));
390 fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two]));
391 fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
392 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
393 fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
394 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
395 break;
396 default:
397 WARN_ONCE("Bad src index in emit_mad: %d\n", one);
398 break;
399 }
400 switch (three) {
401 case 0:
402 case 1:
403 case 2:
404 fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three]));
405 fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three]));
406 fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
407 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
408 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
409 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
410 break;
411 default:
412 WARN_ONCE("Bad src index in emit_mad: %d\n", one);
413 break;
414 }
415 }
416
417 static GLboolean parse_program(struct r500_fragment_program *fp)
418 {
419 struct gl_fragment_program *mp = &fp->mesa_program;
420 const struct prog_instruction *inst = mp->Base.Instructions;
421 struct prog_instruction *fpi;
422 GLuint src[3], dest = 0;
423 int temp_swiz, counter = 0;
424
425 if (!inst || inst[0].Opcode == OPCODE_END) {
426 ERROR("The program is empty!\n");
427 return GL_FALSE;
428 }
429
430 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
431
432 if (fpi->Opcode != OPCODE_KIL) {
433 dest = make_dest(fp, fpi->DstReg);
434 }
435
436 switch (fpi->Opcode) {
437 case OPCODE_ABS:
438 emit_alu(fp, counter, fpi);
439 emit_mov(fp, counter, fpi->SrcReg[0], dest);
440 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
441 | R500_ALU_RGB_MOD_B_ABS;
442 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
443 | R500_ALPHA_MOD_B_ABS;
444 break;
445 case OPCODE_ADD:
446 src[0] = make_src(fp, fpi->SrcReg[0]);
447 src[1] = make_src(fp, fpi->SrcReg[1]);
448 /* Variation on MAD: 1*src0+src1 */
449 emit_alu(fp, counter, fpi);
450 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
451 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
452 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
453 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
454 fp->inst[counter].inst3 = /* 1 */
455 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
456 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
457 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
458 | R500_ALPHA_ADDRD(dest)
459 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
460 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
461 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
462 | R500_ALU_RGBA_ADDRD(dest)
463 | R500_ALU_RGBA_SEL_C_SRC1
464 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
465 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
466 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
467 break;
468 case OPCODE_CMP:
469 /* This inst's selects need to be swapped as follows:
470 * 0 -> C ; 1 -> B ; 2 -> A */
471 src[0] = make_src(fp, fpi->SrcReg[0]);
472 src[1] = make_src(fp, fpi->SrcReg[1]);
473 src[2] = make_src(fp, fpi->SrcReg[2]);
474 emit_alu(fp, counter, fpi);
475 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
476 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
477 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
478 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
479 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
480 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
481 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
482 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
483 | R500_ALPHA_ADDRD(dest)
484 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
485 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
486 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
487 | R500_ALU_RGBA_ADDRD(dest)
488 | R500_ALU_RGBA_SEL_C_SRC2
489 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
490 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
491 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
492 break;
493 case OPCODE_COS:
494 src[0] = make_src(fp, fpi->SrcReg[0]);
495 src[1] = emit_const4fv(fp, RCP_2PI);
496 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
497 | (R500_WRITEMASK_ARGB << 11);
498 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
499 | R500_RGB_ADDR1(src[1]);
500 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
501 | R500_ALPHA_ADDR1(src[1]);
502 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
503 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
504 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
505 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
506 | R500_ALPHA_ADDRD(get_temp(fp, 0))
507 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
508 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
509 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
510 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
511 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
512 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
513 counter++;
514 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
515 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
516 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
517 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
518 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
519 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
520 | R500_ALPHA_ADDRD(get_temp(fp, 1))
521 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
522 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
523 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
524 counter++;
525 emit_alu(fp, counter, fpi);
526 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
527 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
528 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
529 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
530 | R500_ALPHA_ADDRD(dest)
531 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
532 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
533 | R500_ALU_RGBA_ADDRD(dest);
534 break;
535 case OPCODE_DP3:
536 src[0] = make_src(fp, fpi->SrcReg[0]);
537 src[1] = make_src(fp, fpi->SrcReg[1]);
538 emit_alu(fp, counter, fpi);
539 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
540 | R500_RGB_ADDR1(src[1]);
541 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
542 | R500_ALPHA_ADDR1(src[1]);
543 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
544 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
545 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
546 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
547 | R500_ALPHA_ADDRD(dest)
548 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
549 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
550 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
551 | R500_ALU_RGBA_ADDRD(dest);
552 break;
553 case OPCODE_DP4:
554 src[0] = make_src(fp, fpi->SrcReg[0]);
555 src[1] = make_src(fp, fpi->SrcReg[1]);
556 /* Based on DP3 */
557 emit_alu(fp, counter, fpi);
558 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
559 | R500_RGB_ADDR1(src[1]);
560 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
561 | R500_ALPHA_ADDR1(src[1]);
562 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
563 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
564 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
565 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
566 | R500_ALPHA_ADDRD(dest)
567 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
568 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
569 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
570 | R500_ALU_RGBA_ADDRD(dest);
571 break;
572 case OPCODE_DPH:
573 src[0] = make_src(fp, fpi->SrcReg[0]);
574 src[1] = make_src(fp, fpi->SrcReg[1]);
575 /* Based on DP3 */
576 emit_alu(fp, counter, fpi);
577 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
578 | R500_RGB_ADDR1(src[1]);
579 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
580 | R500_ALPHA_ADDR1(src[1]);
581 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
582 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
583 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
584 fp->inst[counter].inst4 |= R500_ALPHA_OP_DP
585 | R500_ALPHA_ADDRD(dest)
586 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
587 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
588 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
589 | R500_ALU_RGBA_ADDRD(dest);
590 break;
591 case OPCODE_DST:
592 src[0] = make_src(fp, fpi->SrcReg[0]);
593 src[1] = make_src(fp, fpi->SrcReg[1]);
594 /* [1, src0.y*src1.y, src0.z, src1.w]
595 * So basically MUL with lotsa swizzling. */
596 emit_alu(fp, counter, fpi);
597 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
598 | R500_RGB_ADDR1(src[1]);
599 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
600 | R500_ALPHA_ADDR1(src[1]);
601 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
602 | R500_ALU_RGB_SEL_B_SRC1;
603 /* Select [1, y, z, 1] */
604 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
605 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
606 /* Select [1, y, 1, w] */
607 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
608 fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
609 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
610 | R500_ALPHA_ADDRD(dest)
611 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
612 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
613 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
614 | R500_ALU_RGBA_ADDRD(dest)
615 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
616 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
617 break;
618 case OPCODE_EX2:
619 src[0] = make_src(fp, fpi->SrcReg[0]);
620 emit_alu(fp, counter, fpi);
621 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
622 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
623 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
624 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
625 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
626 | R500_ALPHA_ADDRD(dest)
627 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
628 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
629 | R500_ALU_RGBA_ADDRD(dest);
630 break;
631 case OPCODE_FRC:
632 src[0] = make_src(fp, fpi->SrcReg[0]);
633 emit_alu(fp, counter, fpi);
634 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
635 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
636 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
637 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
638 fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC
639 | R500_ALPHA_ADDRD(dest)
640 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
641 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
642 | R500_ALU_RGBA_ADDRD(dest);
643 break;
644 case OPCODE_LG2:
645 src[0] = make_src(fp, fpi->SrcReg[0]);
646 emit_alu(fp, counter, fpi);
647 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
648 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
649 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
650 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
651 fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2
652 | R500_ALPHA_ADDRD(dest)
653 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
654 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
655 | R500_ALU_RGBA_ADDRD(dest);
656 break;
657 case OPCODE_LRP:
658 /* src0 * src1 + INV(src0) * src2
659 * 1) MUL src0, src1, temp
660 * 2) PRE 1-src0; MAD srcp, src2, temp */
661 src[0] = make_src(fp, fpi->SrcReg[0]);
662 src[1] = make_src(fp, fpi->SrcReg[1]);
663 src[2] = make_src(fp, fpi->SrcReg[2]);
664 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
665 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
666 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
667 | R500_RGB_ADDR1(src[1]);
668 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
669 | R500_ALPHA_ADDR1(src[1]);
670 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
671 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
672 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
673 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
674 | R500_ALPHA_ADDRD(get_temp(fp, 0))
675 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
676 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
677 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
678 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
679 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
680 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
681 counter++;
682 emit_alu(fp, counter, fpi);
683 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
684 | R500_RGB_ADDR1(src[2])
685 | R500_RGB_ADDR2(get_temp(fp, 0))
686 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
687 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
688 | R500_ALPHA_ADDR1(src[2])
689 | R500_ALPHA_ADDR2(get_temp(fp, 0))
690 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
691 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
692 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
693 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
694 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
695 | R500_ALPHA_ADDRD(dest)
696 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
697 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
698 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
699 | R500_ALU_RGBA_ADDRD(dest)
700 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
701 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
702 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
703 break;
704 case OPCODE_MAD:
705 /* src[0] = make_src(fp, fpi->SrcReg[0]);
706 src[1] = make_src(fp, fpi->SrcReg[1]);
707 src[2] = make_src(fp, fpi->SrcReg[2]);
708 emit_alu(fp, counter, fpi);
709 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
710 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
711 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
712 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
713 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
714 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
715 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
716 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
717 | R500_ALPHA_ADDRD(dest)
718 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
719 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
720 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
721 | R500_ALU_RGBA_ADDRD(dest)
722 | R500_ALU_RGBA_SEL_C_SRC2
723 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
724 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
725 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); */
726 emit_mad(fp, counter, fpi, 0, 1, 2);
727 break;
728 case OPCODE_MAX:
729 src[0] = make_src(fp, fpi->SrcReg[0]);
730 src[1] = make_src(fp, fpi->SrcReg[1]);
731 emit_alu(fp, counter, fpi);
732 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
733 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
734 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
735 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
736 | R500_ALU_RGB_SEL_B_SRC1
737 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
738 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX
739 | R500_ALPHA_ADDRD(dest)
740 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
741 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
742 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
743 | R500_ALU_RGBA_ADDRD(dest);
744 break;
745 case OPCODE_MIN:
746 src[0] = make_src(fp, fpi->SrcReg[0]);
747 src[1] = make_src(fp, fpi->SrcReg[1]);
748 emit_alu(fp, counter, fpi);
749 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
750 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
751 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
752 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
753 | R500_ALU_RGB_SEL_B_SRC1
754 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
755 fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN
756 | R500_ALPHA_ADDRD(dest)
757 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
758 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
759 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
760 | R500_ALU_RGBA_ADDRD(dest);
761 break;
762 case OPCODE_MOV:
763 emit_alu(fp, counter, fpi);
764 emit_mov(fp, counter, fpi->SrcReg[0], dest);
765 break;
766 case OPCODE_MUL:
767 src[0] = make_src(fp, fpi->SrcReg[0]);
768 src[1] = make_src(fp, fpi->SrcReg[1]);
769 /* Variation on MAD: src0*src1+0 */
770 emit_alu(fp, counter, fpi);
771 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
772 | R500_RGB_ADDR1(src[1]);
773 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
774 | R500_ALPHA_ADDR1(src[1]);
775 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
776 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
777 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
778 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
779 | R500_ALPHA_ADDRD(dest)
780 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
781 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
782 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
783 | R500_ALU_RGBA_ADDRD(dest)
784 // | R500_ALU_RGBA_SEL_C_SRC2
785 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
786 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
787 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
788 break;
789 case OPCODE_POW:
790 /* POW(a,b) = EX2(LN2(a)*b) */
791 src[0] = make_src(fp, fpi->SrcReg[0]);
792 src[1] = make_src(fp, fpi->SrcReg[1]);
793 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
794 | (R500_WRITEMASK_ARGB << 11);
795 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
796 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
797 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
798 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
799 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
800 | R500_ALPHA_ADDRD(get_temp(fp, 0))
801 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
802 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
803 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0));
804 counter++;
805 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
806 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0))
807 | R500_RGB_ADDR1(src[1]);
808 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0))
809 | R500_ALPHA_ADDR1(src[1]);
810 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
811 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
812 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
813 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
814 | R500_ALPHA_ADDRD(get_temp(fp, 1))
815 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
816 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
817 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
818 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1))
819 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
820 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
821 counter++;
822 emit_alu(fp, counter, fpi);
823 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
824 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
825 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
826 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
827 fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2
828 | R500_ALPHA_ADDRD(dest)
829 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
830 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
831 | R500_ALU_RGBA_ADDRD(dest);
832 break;
833 case OPCODE_RCP:
834 src[0] = make_src(fp, fpi->SrcReg[0]);
835 emit_alu(fp, counter, fpi);
836 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
837 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
838 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
839 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
840 fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP
841 | R500_ALPHA_ADDRD(dest)
842 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
843 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
844 | R500_ALU_RGBA_ADDRD(dest);
845 break;
846 case OPCODE_RSQ:
847 src[0] = make_src(fp, fpi->SrcReg[0]);
848 emit_alu(fp, counter, fpi);
849 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
850 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
851 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
852 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
853 fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ
854 | R500_ALPHA_ADDRD(dest)
855 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
856 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
857 | R500_ALU_RGBA_ADDRD(dest);
858 break;
859 case OPCODE_SCS:
860 src[0] = make_src(fp, fpi->SrcReg[0]);
861 src[1] = emit_const4fv(fp, RCP_2PI);
862 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
863 | (R500_WRITEMASK_ARGB << 11);
864 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
865 | R500_RGB_ADDR1(src[1]);
866 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
867 | R500_ALPHA_ADDR1(src[1]);
868 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
869 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
870 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
871 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
872 | R500_ALPHA_ADDRD(get_temp(fp, 0))
873 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
874 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
875 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
876 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
877 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
878 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
879 counter++;
880 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
881 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
882 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
883 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
884 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
885 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
886 | R500_ALPHA_ADDRD(get_temp(fp, 1))
887 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
888 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
889 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
890 counter++;
891 /* Do a cosine, then a sine, masking out the channels we want to protect. */
892 /* Cosine only goes in R (x) channel. */
893 fpi->DstReg.WriteMask = 0x1;
894 emit_alu(fp, counter, fpi);
895 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
896 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
897 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
898 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
899 fp->inst[counter].inst4 |= R500_ALPHA_OP_COS
900 | R500_ALPHA_ADDRD(dest)
901 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
902 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
903 | R500_ALU_RGBA_ADDRD(dest);
904 counter++;
905 /* Sine only goes in G (y) channel. */
906 fpi->DstReg.WriteMask = 0x2;
907 emit_alu(fp, counter, fpi);
908 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
909 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
910 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
911 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
912 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
913 | R500_ALPHA_ADDRD(dest)
914 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
915 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
916 | R500_ALU_RGBA_ADDRD(dest);
917 break;
918 case OPCODE_SGE:
919 src[0] = make_src(fp, fpi->SrcReg[0]);
920 src[1] = make_src(fp, fpi->SrcReg[1]);
921 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
922 | (R500_WRITEMASK_ARGB << 11);
923 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
924 | R500_RGB_ADDR2(src[1]);
925 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
926 | R500_ALPHA_ADDR2(src[1]);
927 fp->inst[counter].inst3 = /* 1 */
928 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
929 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
930 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
931 | R500_ALPHA_ADDRD(get_temp(fp, 0))
932 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
933 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
934 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
935 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
936 | R500_ALU_RGBA_SEL_C_SRC2
937 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
938 | R500_ALU_RGBA_MOD_C_NEG
939 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
940 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
941 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
942 counter++;
943 /* This inst's selects need to be swapped as follows:
944 * 0 -> C ; 1 -> B ; 2 -> A */
945 emit_alu(fp, counter, fpi);
946 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
947 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
948 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
949 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
950 | R500_ALU_RGB_SEL_B_SRC0
951 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
952 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
953 | R500_ALPHA_ADDRD(dest)
954 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
955 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
956 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
957 | R500_ALU_RGBA_ADDRD(dest)
958 | R500_ALU_RGBA_SEL_C_SRC0
959 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
960 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
961 | R500_ALU_RGBA_A_SWIZ_A;
962 break;
963 case OPCODE_SIN:
964 src[0] = make_src(fp, fpi->SrcReg[0]);
965 src[1] = emit_const4fv(fp, RCP_2PI);
966 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
967 | (R500_WRITEMASK_ARGB << 11);
968 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
969 | R500_RGB_ADDR1(src[1]);
970 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
971 | R500_ALPHA_ADDR1(src[1]);
972 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
973 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
974 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
975 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
976 | R500_ALPHA_ADDRD(get_temp(fp, 0))
977 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
978 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
979 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
980 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
981 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
982 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
983 counter++;
984 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
985 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
986 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
987 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
988 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
989 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
990 | R500_ALPHA_ADDRD(get_temp(fp, 1))
991 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
992 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
993 | R500_ALU_RGBA_ADDRD(get_temp(fp, 1));
994 counter++;
995 emit_alu(fp, counter, fpi);
996 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1));
997 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1));
998 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0;
999 fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN
1000 | R500_ALPHA_ADDRD(dest)
1001 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_sop_swizzle(fpi->SrcReg[0]));
1002 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
1003 | R500_ALU_RGBA_ADDRD(dest);
1004 break;
1005 case OPCODE_SLT:
1006 src[0] = make_src(fp, fpi->SrcReg[0]);
1007 src[1] = make_src(fp, fpi->SrcReg[1]);
1008 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1009 | (R500_WRITEMASK_ARGB << 11);
1010 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1011 | R500_RGB_ADDR2(src[1]);
1012 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1013 | R500_ALPHA_ADDR2(src[1]);
1014 fp->inst[counter].inst3 = /* 1 */
1015 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1016 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1017 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
1018 | R500_ALPHA_ADDRD(get_temp(fp, 0))
1019 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1020 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1021 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1022 | R500_ALU_RGBA_ADDRD(get_temp(fp, 0))
1023 | R500_ALU_RGBA_SEL_C_SRC2
1024 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1025 | R500_ALU_RGBA_MOD_C_NEG
1026 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1027 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1028 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1029 counter++;
1030 /* This inst's selects need to be swapped as follows:
1031 * 0 -> C ; 1 -> B ; 2 -> A */
1032 emit_alu(fp, counter, fpi);
1033 fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0));
1034 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0));
1035 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1036 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1037 | R500_ALU_RGB_SEL_B_SRC0
1038 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1039 fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1040 | R500_ALPHA_ADDRD(dest)
1041 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1042 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1043 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1044 | R500_ALU_RGBA_ADDRD(dest)
1045 | R500_ALU_RGBA_SEL_C_SRC0
1046 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1047 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1048 | R500_ALU_RGBA_A_SWIZ_A;
1049 break;
1050 case OPCODE_SUB:
1051 src[0] = make_src(fp, fpi->SrcReg[0]);
1052 src[1] = make_src(fp, fpi->SrcReg[1]);
1053 /* Variation on MAD: 1*src0-src1 */
1054 emit_alu(fp, counter, fpi);
1055 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1056 | R500_RGB_ADDR2(src[1]);
1057 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1058 | R500_ALPHA_ADDR2(src[1]);
1059 fp->inst[counter].inst3 = /* 1 */
1060 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1061 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1062 fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD
1063 | R500_ALPHA_ADDRD(dest)
1064 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1065 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1066 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1067 | R500_ALU_RGBA_ADDRD(dest)
1068 | R500_ALU_RGBA_SEL_C_SRC2
1069 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1070 | R500_ALU_RGBA_MOD_C_NEG
1071 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1072 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1073 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1074 break;
1075 case OPCODE_SWZ:
1076 /* TODO: Negation masks! */
1077 emit_alu(fp, counter, fpi);
1078 emit_mov(fp, counter, fpi->SrcReg[0], dest);
1079 break;
1080 case OPCODE_KIL:
1081 case OPCODE_TEX:
1082 case OPCODE_TXB:
1083 case OPCODE_TXP:
1084 emit_tex(fp, fpi, dest, counter);
1085 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1086 counter++;
1087 break;
1088 default:
1089 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1090 break;
1091 }
1092
1093 /* Finishing touches */
1094 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1095 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1096 }
1097
1098 counter++;
1099
1100 if (fp->error)
1101 return GL_FALSE;
1102
1103 }
1104
1105 /* Finish him! (If it's an ALU/OUT instruction...) */
1106 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
1107 fp->inst[counter-1].inst0 |= R500_INST_LAST;
1108 } else {
1109 /* We still need to put an output inst, right? */
1110 WARN_ONCE("Final FP instruction is not an OUT.\n");
1111 }
1112
1113 fp->cs->nrslots = counter;
1114
1115 fp->max_temp_idx++;
1116
1117 return GL_TRUE;
1118 }
1119
1120 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
1121 {
1122 struct r300_pfs_compile_state *cs = NULL;
1123 struct gl_fragment_program *mp = &fp->mesa_program;
1124 struct prog_instruction *fpi;
1125 GLuint InputsRead = mp->Base.InputsRead;
1126 GLuint temps_used = 0;
1127 int i, j;
1128
1129 /* New compile, reset tracking data */
1130 fp->optimization =
1131 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
1132 fp->translated = GL_FALSE;
1133 fp->error = GL_FALSE;
1134 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
1135 fp->cur_node = 0;
1136 fp->first_node_has_tex = 0;
1137 fp->const_nr = 0;
1138 /* Size of pixel stack, plus 1. */
1139 fp->max_temp_idx = 1;
1140 /* Temp register offset. */
1141 fp->temp_reg_offset = 0;
1142 fp->node[0].alu_end = -1;
1143 fp->node[0].tex_end = -1;
1144
1145 _mesa_memset(cs, 0, sizeof(*fp->cs));
1146 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1147 for (j = 0; j < 3; j++) {
1148 cs->slot[i].vsrc[j] = SRC_CONST;
1149 cs->slot[i].ssrc[j] = SRC_CONST;
1150 }
1151 }
1152
1153 /* Work out what temps the Mesa inputs correspond to, this must match
1154 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1155 * configures itself based on the fragprog's InputsRead
1156 *
1157 * NOTE: this depends on get_hw_temp() allocating registers in order,
1158 * starting from register 0, so we're just going to do that instead.
1159 */
1160
1161 /* Texcoords come first */
1162 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
1163 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1164 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1165 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1166 fp->temp_reg_offset;
1167 fp->temp_reg_offset++;
1168 }
1169 }
1170 InputsRead &= ~FRAG_BITS_TEX_ANY;
1171
1172 /* fragment position treated as a texcoord */
1173 if (InputsRead & FRAG_BIT_WPOS) {
1174 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1175 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1176 fp->temp_reg_offset;
1177 fp->temp_reg_offset++;
1178 }
1179 InputsRead &= ~FRAG_BIT_WPOS;
1180
1181 /* Then primary colour */
1182 if (InputsRead & FRAG_BIT_COL0) {
1183 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1184 cs->inputs[FRAG_ATTRIB_COL0].reg =
1185 fp->temp_reg_offset;
1186 fp->temp_reg_offset++;
1187 }
1188 InputsRead &= ~FRAG_BIT_COL0;
1189
1190 /* Secondary color */
1191 if (InputsRead & FRAG_BIT_COL1) {
1192 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1193 cs->inputs[FRAG_ATTRIB_COL1].reg =
1194 fp->temp_reg_offset;
1195 fp->temp_reg_offset++;
1196 }
1197 InputsRead &= ~FRAG_BIT_COL1;
1198
1199 /* Anything else */
1200 if (InputsRead) {
1201 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1202 /* force read from hwreg 0 for now */
1203 for (i = 0; i < 32; i++)
1204 if (InputsRead & (1 << i))
1205 cs->inputs[i].reg = 0;
1206 }
1207
1208 if (!mp->Base.Instructions) {
1209 ERROR("No instructions found in program, going to go die now.\n");
1210 return;
1211 }
1212
1213 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
1214 for (i = 0; i < 3; i++) {
1215 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1216 if (fpi->SrcReg[i].Index > temps_used)
1217 temps_used = fpi->SrcReg[i].Index;
1218 }
1219 }
1220 }
1221
1222 cs->temp_in_use = temps_used;
1223
1224 fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use + 1;
1225 }
1226
1227 static void update_params(struct r500_fragment_program *fp)
1228 {
1229 struct gl_fragment_program *mp = &fp->mesa_program;
1230
1231 /* Ask Mesa nicely to fill in ParameterValues for us */
1232 if (mp->Base.Parameters)
1233 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
1234 }
1235
1236 static void dumb_shader(struct r500_fragment_program *fp)
1237 {
1238 fp->inst[0].inst0 = R500_INST_TYPE_TEX
1239 | R500_INST_TEX_SEM_WAIT
1240 | R500_INST_RGB_WMASK_R
1241 | R500_INST_RGB_WMASK_G
1242 | R500_INST_RGB_WMASK_B
1243 | R500_INST_ALPHA_WMASK
1244 | R500_INST_RGB_CLAMP
1245 | R500_INST_ALPHA_CLAMP;
1246 fp->inst[0].inst1 = R500_TEX_ID(0)
1247 | R500_TEX_INST_LD
1248 | R500_TEX_SEM_ACQUIRE
1249 | R500_TEX_IGNORE_UNCOVERED;
1250 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1251 | R500_TEX_SRC_S_SWIZ_R
1252 | R500_TEX_SRC_T_SWIZ_G
1253 | R500_TEX_DST_ADDR(0)
1254 | R500_TEX_DST_R_SWIZ_R
1255 | R500_TEX_DST_G_SWIZ_G
1256 | R500_TEX_DST_B_SWIZ_B
1257 | R500_TEX_DST_A_SWIZ_A;
1258 fp->inst[0].inst3 = R500_DX_ADDR(0)
1259 | R500_DX_S_SWIZ_R
1260 | R500_DX_T_SWIZ_R
1261 | R500_DX_R_SWIZ_R
1262 | R500_DX_Q_SWIZ_R
1263 | R500_DY_ADDR(0)
1264 | R500_DY_S_SWIZ_R
1265 | R500_DY_T_SWIZ_R
1266 | R500_DY_R_SWIZ_R
1267 | R500_DY_Q_SWIZ_R;
1268 fp->inst[0].inst4 = 0x0;
1269 fp->inst[0].inst5 = 0x0;
1270
1271 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
1272 R500_INST_TEX_SEM_WAIT |
1273 R500_INST_LAST |
1274 R500_INST_RGB_OMASK_R |
1275 R500_INST_RGB_OMASK_G |
1276 R500_INST_RGB_OMASK_B |
1277 R500_INST_ALPHA_OMASK;
1278 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
1279 R500_RGB_ADDR1(0) |
1280 R500_RGB_ADDR1_CONST |
1281 R500_RGB_ADDR2(0) |
1282 R500_RGB_ADDR2_CONST |
1283 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1284 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1285 R500_ALPHA_ADDR1(0) |
1286 R500_ALPHA_ADDR1_CONST |
1287 R500_ALPHA_ADDR2(0) |
1288 R500_ALPHA_ADDR2_CONST |
1289 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1290 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1291 R500_ALU_RGB_R_SWIZ_A_R |
1292 R500_ALU_RGB_G_SWIZ_A_G |
1293 R500_ALU_RGB_B_SWIZ_A_B |
1294 R500_ALU_RGB_SEL_B_SRC0 |
1295 R500_ALU_RGB_R_SWIZ_B_1 |
1296 R500_ALU_RGB_B_SWIZ_B_1 |
1297 R500_ALU_RGB_G_SWIZ_B_1;
1298 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
1299 R500_ALPHA_SWIZ_A_A |
1300 R500_ALPHA_SWIZ_B_1;
1301 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1302 R500_ALU_RGBA_R_SWIZ_0 |
1303 R500_ALU_RGBA_G_SWIZ_0 |
1304 R500_ALU_RGBA_B_SWIZ_0 |
1305 R500_ALU_RGBA_A_SWIZ_0;
1306
1307 fp->cs->nrslots = 2;
1308 fp->translated = GL_TRUE;
1309 }
1310
1311 void r500TranslateFragmentShader(r300ContextPtr r300,
1312 struct r500_fragment_program *fp)
1313 {
1314
1315 struct r300_pfs_compile_state *cs = NULL;
1316
1317 if (!fp->translated) {
1318
1319 init_program(r300, fp);
1320 cs = fp->cs;
1321
1322 if (parse_program(fp) == GL_FALSE) {
1323 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1324 dumb_shader(fp);
1325 fp->inst_offset = 0;
1326 fp->inst_end = cs->nrslots - 1;
1327 return;
1328 }
1329 fp->inst_offset = 0;
1330 fp->inst_end = cs->nrslots - 1;
1331
1332 fp->translated = GL_TRUE;
1333 if (RADEON_DEBUG & DEBUG_PIXEL) {
1334 fprintf(stderr, "Mesa program:\n");
1335 fprintf(stderr, "-------------\n");
1336 _mesa_print_program(&fp->mesa_program.Base);
1337 fflush(stdout);
1338 dump_program(fp);
1339 }
1340
1341
1342 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
1343 }
1344
1345 update_params(fp);
1346
1347 }
1348
1349 static char *toswiz(int swiz_val) {
1350 switch(swiz_val) {
1351 case 0: return "R";
1352 case 1: return "G";
1353 case 2: return "B";
1354 case 3: return "A";
1355 case 4: return "0";
1356 case 5: return "1/2";
1357 case 6: return "1";
1358 case 7: return "U";
1359 }
1360 return NULL;
1361 }
1362
1363 static char *toop(int op_val)
1364 {
1365 char *str;
1366 switch (op_val) {
1367 case 0: str = "MAD"; break;
1368 case 1: str = "DP3"; break;
1369 case 2: str = "DP4"; break;
1370 case 3: str = "D2A"; break;
1371 case 4: str = "MIN"; break;
1372 case 5: str = "MAX"; break;
1373 case 6: str = "Reserved"; break;
1374 case 7: str = "CND"; break;
1375 case 8: str = "CMP"; break;
1376 case 9: str = "FRC"; break;
1377 case 10: str = "SOP"; break;
1378 case 11: str = "MDH"; break;
1379 case 12: str = "MDV"; break;
1380 }
1381 return str;
1382 }
1383
1384 static char *to_alpha_op(int op_val)
1385 {
1386 char *str = NULL;
1387 switch (op_val) {
1388 case 0: str = "MAD"; break;
1389 case 1: str = "DP"; break;
1390 case 2: str = "MIN"; break;
1391 case 3: str = "MAX"; break;
1392 case 4: str = "Reserved"; break;
1393 case 5: str = "CND"; break;
1394 case 6: str = "CMP"; break;
1395 case 7: str = "FRC"; break;
1396 case 8: str = "EX2"; break;
1397 case 9: str = "LN2"; break;
1398 case 10: str = "RCP"; break;
1399 case 11: str = "RSQ"; break;
1400 case 12: str = "SIN"; break;
1401 case 13: str = "COS"; break;
1402 case 14: str = "MDH"; break;
1403 case 15: str = "MDV"; break;
1404 }
1405 return str;
1406 }
1407
1408 static char *to_mask(int val)
1409 {
1410 char *str = NULL;
1411 switch(val) {
1412 case 0: str = "NONE"; break;
1413 case 1: str = "R"; break;
1414 case 2: str = "G"; break;
1415 case 3: str = "RG"; break;
1416 case 4: str = "B"; break;
1417 case 5: str = "RB"; break;
1418 case 6: str = "GB"; break;
1419 case 7: str = "RGB"; break;
1420 case 8: str = "A"; break;
1421 case 9: str = "AR"; break;
1422 case 10: str = "AG"; break;
1423 case 11: str = "ARG"; break;
1424 case 12: str = "AB"; break;
1425 case 13: str = "ARB"; break;
1426 case 14: str = "AGB"; break;
1427 case 15: str = "ARGB"; break;
1428 }
1429 return str;
1430 }
1431
1432 static char *to_texop(int val)
1433 {
1434 switch(val) {
1435 case 0: return "NOP";
1436 case 1: return "LD";
1437 case 2: return "TEXKILL";
1438 case 3: return "PROJ";
1439 case 4: return "LODBIAS";
1440 case 5: return "LOD";
1441 case 6: return "DXDY";
1442 }
1443 return NULL;
1444 }
1445
1446 static void dump_program(struct r500_fragment_program *fp)
1447 {
1448 int pc = 0;
1449 int n;
1450 uint32_t inst;
1451 uint32_t inst0;
1452 char *str = NULL;
1453
1454 for (n = 0; n < fp->inst_end+1; n++) {
1455 inst0 = inst = fp->inst[n].inst0;
1456 fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
1457 switch(inst & 0x3) {
1458 case R500_INST_TYPE_ALU: str = "ALU"; break;
1459 case R500_INST_TYPE_OUT: str = "OUT"; break;
1460 case R500_INST_TYPE_FC: str = "FC"; break;
1461 case R500_INST_TYPE_TEX: str = "TEX"; break;
1462 };
1463 fprintf(stderr,"%s %s %s %s %s ", str,
1464 inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
1465 inst & R500_INST_LAST ? "LAST" : "",
1466 inst & R500_INST_NOP ? "NOP" : "",
1467 inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
1468 fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
1469 to_mask((inst >> 15) & 0xf));
1470
1471 switch(inst0 & 0x3) {
1472 case 0:
1473 case 1:
1474 fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1);
1475 inst = fp->inst[n].inst1;
1476
1477 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1478 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1479 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1480 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1481 (inst >> 30));
1482
1483 fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2);
1484 inst = fp->inst[n].inst2;
1485 fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
1486 inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
1487 (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
1488 (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
1489 (inst >> 30));
1490 fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3);
1491 inst = fp->inst[n].inst3;
1492 fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
1493 (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
1494 (inst >> 11) & 0x3,
1495 (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
1496 (inst >> 24) & 0x3);
1497
1498
1499 fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4);
1500 inst = fp->inst[n].inst4;
1501 fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
1502 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1503 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
1504 (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
1505 (inst >> 31) & 0x1);
1506
1507 fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5);
1508 inst = fp->inst[n].inst5;
1509 fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
1510 (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
1511 (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
1512 (inst >> 23) & 0x3,
1513 (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
1514 break;
1515 case 2:
1516 break;
1517 case 3:
1518 inst = fp->inst[n].inst1;
1519 fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
1520 to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
1521 (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
1522 inst = fp->inst[n].inst2;
1523 fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
1524 inst & 127, inst & (1<<7) ? "(rel)" : "",
1525 toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
1526 toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
1527 (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
1528 toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
1529 toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
1530
1531 fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3);
1532 break;
1533 }
1534 fprintf(stderr,"\n");
1535 }
1536
1537 }