Merge commit 'origin/master' into drm-gem
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 * \todo Depth write, WPOS/FOGC inputs
41 *
42 * \todo FogOption
43 *
44 */
45
46 #include "glheader.h"
47 #include "macros.h"
48 #include "enums.h"
49 #include "shader/prog_instruction.h"
50 #include "shader/prog_parameter.h"
51 #include "shader/prog_print.h"
52
53 #include "r300_context.h"
54 #include "r500_fragprog.h"
55 #include "r300_reg.h"
56 #include "r300_state.h"
57
58 /* Mapping Mesa registers to R500 temporaries */
59 struct reg_acc {
60 int reg; /* Assigned hw temp */
61 unsigned int refcount; /* Number of uses by mesa program */
62 };
63
64 /**
65 * Describe the current lifetime information for an R300 temporary
66 */
67 struct reg_lifetime {
68 /* Index of the first slot where this register is free in the sense
69 that it can be used as a new destination register.
70 This is -1 if the register has been assigned to a Mesa register
71 and the last access to the register has not yet been emitted */
72 int free;
73
74 /* Index of the first slot where this register is currently reserved.
75 This is used to stop e.g. a scalar operation from being moved
76 before the allocation time of a register that was first allocated
77 for a vector operation. */
78 int reserved;
79
80 /* Index of the first slot in which the register can be used as a
81 source without losing the value that is written by the last
82 emitted instruction that writes to the register */
83 int vector_valid;
84 int scalar_valid;
85
86 /* Index to the slot where the register was last read.
87 This is also the first slot in which the register may be written again */
88 int vector_lastread;
89 int scalar_lastread;
90 };
91
92 /**
93 * Store usage information about an ALU instruction slot during the
94 * compilation of a fragment program.
95 */
96 #define SLOT_SRC_VECTOR (1<<0)
97 #define SLOT_SRC_SCALAR (1<<3)
98 #define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR)
99 #define SLOT_OP_VECTOR (1<<16)
100 #define SLOT_OP_SCALAR (1<<17)
101 #define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR)
102
103 struct r500_pfs_compile_slot {
104 /* Bitmask indicating which parts of the slot are used, using SLOT_ constants
105 defined above */
106 unsigned int used;
107
108 /* Selected sources */
109 int vsrc[3];
110 int ssrc[3];
111 };
112
113 /**
114 * Store information during compilation of fragment programs.
115 */
116 struct r500_pfs_compile_state {
117 struct r500_fragment_program_compiler *compiler;
118
119 /* number of ALU slots used so far */
120 int nrslots;
121
122 /* Track which (parts of) slots are already filled with instructions */
123 struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST];
124
125 /* Track the validity of R300 temporaries */
126 struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS];
127
128 /* Used to map Mesa's inputs/temps onto hardware temps */
129 int temp_in_use;
130 struct reg_acc temps[PFS_NUM_TEMP_REGS];
131 struct reg_acc inputs[32]; /* don't actually need 32... */
132
133 /* Track usage of hardware temps, for register allocation,
134 * indirection detection, etc. */
135 GLuint used_in_node;
136 GLuint dest_in_node;
137 };
138
139 /*
140 * Useful macros and values
141 */
142 #define ERROR(fmt, args...) do { \
143 fprintf(stderr, "%s::%s(): " fmt "\n", \
144 __FILE__, __FUNCTION__, ##args); \
145 cs->compiler->fp->error = GL_TRUE; \
146 } while(0)
147
148 #define PROG_CODE struct r500_fragment_program_code *code = cs->compiler->code
149
150 #define R500_US_NUM_TEMP_REGS 128
151 #define R500_US_NUM_CONST_REGS 256
152
153 /* "Register" flags */
154 #define REG_CONSTANT (1 << 8)
155 #define REG_SRC_REL (1 << 9)
156 #define REG_DEST_REL (1 << 7)
157
158 /* Swizzle tools */
159 #define R500_SWIZZLE_ZERO 4
160 #define R500_SWIZZLE_HALF 5
161 #define R500_SWIZZLE_ONE 6
162 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
163 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
164 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
165 #define R500_SWIZ_MOD_NEG 1
166 #define R500_SWIZ_MOD_ABS 2
167 #define R500_SWIZ_MOD_NEG_ABS 3
168 /* Swizzles for inst2 */
169 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
170 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
171 /* Swizzles for inst3 */
172 #define MAKE_SWIZ_RGB_A(x) (x << 2)
173 #define MAKE_SWIZ_RGB_B(x) (x << 15)
174 /* Swizzles for inst4 */
175 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
176 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
177 /* Swizzle for inst5 */
178 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
179 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
180
181 /* Writemasks */
182 #define R500_WRITEMASK_G 0x2
183 #define R500_WRITEMASK_B 0x4
184 #define R500_WRITEMASK_RGB 0x7
185 #define R500_WRITEMASK_A 0x8
186 #define R500_WRITEMASK_AR 0x9
187 #define R500_WRITEMASK_AG 0xA
188 #define R500_WRITEMASK_ARG 0xB
189 #define R500_WRITEMASK_AB 0xC
190 #define R500_WRITEMASK_ARGB 0xF
191
192 /* 1/(2pi), needed for quick modulus in trig insts
193 * Thanks to glisse for pointing out how to do it! */
194 static const GLfloat RCP_2PI[] = {0.15915494309189535,
195 0.15915494309189535,
196 0.15915494309189535,
197 0.15915494309189535};
198
199 static const GLfloat LIT[] = {127.999999,
200 127.999999,
201 127.999999,
202 -127.999999};
203
204 static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
205 GLuint swiz = 0x0;
206 GLuint temp;
207 /* This could be optimized, but it should be plenty fast already. */
208 int i;
209 for (i = 0; i < 3; i++) {
210 temp = GET_SWZ(src.Swizzle, i);
211 /* Fix SWIZZLE_ONE */
212 if (temp == 5) temp++;
213 swiz |= temp << i*3;
214 }
215 if (src.NegateBase)
216 swiz |= (R500_SWIZ_MOD_NEG << 9);
217 return swiz;
218 }
219
220 static INLINE GLuint make_rgba_swizzle(GLuint src) {
221 GLuint swiz = 0x0;
222 GLuint temp;
223 int i;
224 for (i = 0; i < 4; i++) {
225 temp = GET_SWZ(src, i);
226 /* Fix SWIZZLE_ONE */
227 if (temp == 5) temp++;
228 swiz |= temp << i*3;
229 }
230 return swiz;
231 }
232
233 static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
234 GLuint swiz = GET_SWZ(src.Swizzle, 3);
235
236 if (swiz == 5) swiz++;
237
238 if (src.NegateBase)
239 swiz |= (R500_SWIZ_MOD_NEG << 3);
240
241 return swiz;
242 }
243
244 static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
245 GLuint swiz = GET_SWZ(src.Swizzle, 0);
246
247 if (swiz == 5) swiz++;
248 return swiz;
249 }
250
251 static INLINE GLuint make_strq_swizzle(struct prog_src_register src) {
252 GLuint swiz = 0x0, temp = 0x0;
253 int i;
254 for (i = 0; i < 4; i++) {
255 temp = GET_SWZ(src.Swizzle, i) & 0x3;
256 swiz |= temp << i*2;
257 }
258 return swiz;
259 }
260
261 static int get_temp(struct r500_pfs_compile_state *cs, int slot) {
262
263 PROG_CODE;
264
265 int r = code->temp_reg_offset + cs->temp_in_use + slot;
266
267 if (r > R500_US_NUM_TEMP_REGS) {
268 ERROR("Too many temporary registers requested, can't compile!\n");
269 }
270
271 return r;
272 }
273
274 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
275 static GLuint emit_const4fv(struct r500_pfs_compile_state *cs,
276 const GLfloat * cp)
277 {
278 PROG_CODE;
279
280 GLuint reg = 0x0;
281 int index;
282
283 for (index = 0; index < code->const_nr; ++index) {
284 if (code->constant[index] == cp)
285 break;
286 }
287
288 if (index >= code->const_nr) {
289 if (index >= R500_US_NUM_CONST_REGS) {
290 ERROR("Out of hw constants!\n");
291 return reg;
292 }
293
294 code->const_nr++;
295 code->constant[index] = cp;
296 }
297
298 reg = index | REG_CONSTANT;
299 return reg;
300 }
301
302 static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_register src) {
303 PROG_CODE;
304 GLuint reg;
305 switch (src.File) {
306 case PROGRAM_TEMPORARY:
307 reg = src.Index + code->temp_reg_offset;
308 break;
309 case PROGRAM_INPUT:
310 reg = cs->inputs[src.Index].reg;
311 break;
312 case PROGRAM_LOCAL_PARAM:
313 reg = emit_const4fv(cs,
314 cs->compiler->fp->mesa_program.Base.LocalParams[src.Index]);
315 break;
316 case PROGRAM_ENV_PARAM:
317 reg = emit_const4fv(cs,
318 cs->compiler->compiler.Ctx->FragmentProgram.Parameters[src.Index]);
319 break;
320 case PROGRAM_STATE_VAR:
321 case PROGRAM_NAMED_PARAM:
322 case PROGRAM_CONSTANT:
323 reg = emit_const4fv(cs,
324 cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]);
325 break;
326 case PROGRAM_BUILTIN:
327 reg = 0x0;
328 break;
329 default:
330 ERROR("Can't handle src.File %x\n", src.File);
331 reg = 0x0;
332 break;
333 }
334 return reg;
335 }
336
337 static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_register dest) {
338 PROG_CODE;
339 GLuint reg;
340 switch (dest.File) {
341 case PROGRAM_TEMPORARY:
342 reg = dest.Index + code->temp_reg_offset;
343 break;
344 case PROGRAM_OUTPUT:
345 /* Eventually we may need to handle multiple
346 * rendering targets... */
347 reg = dest.Index;
348 break;
349 case PROGRAM_BUILTIN:
350 reg = 0x0;
351 break;
352 default:
353 ERROR("Can't handle dest.File %x\n", dest.File);
354 reg = 0x0;
355 break;
356 }
357 return reg;
358 }
359
360 static void emit_tex(struct r500_pfs_compile_state *cs,
361 struct prog_instruction *fpi, int dest, int counter)
362 {
363 PROG_CODE;
364 int hwsrc, hwdest;
365 GLuint mask;
366
367 mask = fpi->DstReg.WriteMask << 11;
368 hwsrc = make_src(cs, fpi->SrcReg[0]);
369
370 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
371 hwdest = get_temp(cs, 0);
372 } else {
373 hwdest = dest;
374 }
375
376 code->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
377 | R500_INST_TEX_SEM_WAIT;
378
379 code->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
380 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
381
382 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
383 code->inst[counter].inst1 |= R500_TEX_UNSCALED;
384
385 switch (fpi->Opcode) {
386 case OPCODE_KIL:
387 code->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
388 break;
389 case OPCODE_TEX:
390 code->inst[counter].inst1 |= R500_TEX_INST_LD;
391 break;
392 case OPCODE_TXB:
393 code->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
394 break;
395 case OPCODE_TXP:
396 code->inst[counter].inst1 |= R500_TEX_INST_PROJ;
397 break;
398 default:
399 ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode);
400 }
401
402 code->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
403 | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0]))
404 /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
405 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */
406 | R500_TEX_DST_ADDR(hwdest)
407 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
408 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
409
410 code->inst[counter].inst3 = 0x0;
411 code->inst[counter].inst4 = 0x0;
412 code->inst[counter].inst5 = 0x0;
413
414 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
415 counter++;
416 code->inst[counter].inst0 = R500_INST_TYPE_OUT
417 | R500_INST_TEX_SEM_WAIT | (mask << 4);
418 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
419 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
420 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
421 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
422 | R500_ALU_RGB_SEL_B_SRC0
423 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB)
424 | R500_ALU_RGB_OMOD_DISABLE;
425 code->inst[counter].inst4 = R500_ALPHA_OP_CMP
426 | R500_ALPHA_ADDRD(dest)
427 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A)
428 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A)
429 | R500_ALPHA_OMOD_DISABLE;
430 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
431 | R500_ALU_RGBA_ADDRD(dest)
432 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
433 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
434 }
435 }
436
437 static void emit_alu(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi) {
438 PROG_CODE;
439 /* Ideally, we shouldn't have to explicitly clear memory here! */
440 code->inst[counter].inst0 = 0x0;
441 code->inst[counter].inst1 = 0x0;
442 code->inst[counter].inst2 = 0x0;
443 code->inst[counter].inst3 = 0x0;
444 code->inst[counter].inst4 = 0x0;
445 code->inst[counter].inst5 = 0x0;
446
447 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
448 code->inst[counter].inst0 = R500_INST_TYPE_OUT;
449
450 if (fpi->DstReg.Index == FRAG_RESULT_COLR)
451 code->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15);
452
453 if (fpi->DstReg.Index == FRAG_RESULT_DEPR) {
454 code->inst[counter].inst4 |= R500_ALPHA_W_OMASK;
455 /* Notify the state emission! */
456 cs->compiler->fp->writes_depth = GL_TRUE;
457 }
458 } else {
459 code->inst[counter].inst0 = R500_INST_TYPE_ALU
460 /* pixel_mask */
461 | (fpi->DstReg.WriteMask << 11);
462 }
463
464 code->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
465 }
466
467 static void emit_mov(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) {
468 PROG_CODE;
469 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
470 * it is technically more accurate and recommended by ATI/AMD. */
471 emit_alu(cs, counter, fpi);
472 code->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
473 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
474 /* (De)mangle the swizzle from Mesa to R500. */
475 swizzle = make_rgba_swizzle(swizzle);
476 /* 0x1FF is 9 bits, size of an RGB swizzle. */
477 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
478 | MAKE_SWIZ_RGB_A((swizzle & 0x1ff))
479 | R500_ALU_RGB_SEL_B_SRC0
480 | MAKE_SWIZ_RGB_B((swizzle & 0x1ff))
481 | R500_ALU_RGB_OMOD_DISABLE;
482 code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
483 | R500_ALPHA_ADDRD(dest)
484 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3))
485 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3))
486 | R500_ALPHA_OMOD_DISABLE;
487 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
488 | R500_ALU_RGBA_ADDRD(dest)
489 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
490 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
491 }
492
493 static void emit_mad(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int one, int two, int three) {
494 PROG_CODE;
495 /* Note: This code was all Corbin's. Corbin is a rather hackish coder.
496 * If you can make it pretty or fast, please do so! */
497 emit_alu(cs, counter, fpi);
498 /* Common MAD stuff */
499 code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
500 | R500_ALPHA_ADDRD(make_dest(cs, fpi->DstReg));
501 code->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD
502 | R500_ALU_RGBA_ADDRD(make_dest(cs, fpi->DstReg));
503 switch (one) {
504 case 0:
505 case 1:
506 case 2:
507 code->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(cs, fpi->SrcReg[one]));
508 code->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(cs, fpi->SrcReg[one]));
509 code->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0
510 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one]));
511 code->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0
512 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one]));
513 break;
514 case R500_SWIZZLE_ZERO:
515 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO);
516 code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO);
517 break;
518 case R500_SWIZZLE_ONE:
519 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE);
520 code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE);
521 break;
522 default:
523 ERROR("Bad src index in emit_mad: %d\n", one);
524 break;
525 }
526 switch (two) {
527 case 0:
528 case 1:
529 case 2:
530 code->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(cs, fpi->SrcReg[two]));
531 code->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(cs, fpi->SrcReg[two]));
532 code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
533 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two]));
534 code->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1
535 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two]));
536 break;
537 case R500_SWIZZLE_ZERO:
538 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
539 code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
540 break;
541 case R500_SWIZZLE_ONE:
542 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
543 code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
544 break;
545 default:
546 ERROR("Bad src index in emit_mad: %d\n", two);
547 break;
548 }
549 switch (three) {
550 case 0:
551 case 1:
552 case 2:
553 code->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(cs, fpi->SrcReg[three]));
554 code->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(cs, fpi->SrcReg[three]));
555 code->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2
556 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three]))
557 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
558 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three]));
559 break;
560 case R500_SWIZZLE_ZERO:
561 code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
562 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
563 break;
564 case R500_SWIZZLE_ONE:
565 code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE)
566 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE);
567 break;
568 default:
569 ERROR("Bad src index in emit_mad: %d\n", three);
570 break;
571 }
572 }
573
574 static void emit_sop(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) {
575 PROG_CODE;
576 emit_alu(cs, counter, fpi);
577 code->inst[counter].inst1 = R500_RGB_ADDR0(src);
578 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src);
579 code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest)
580 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz);
581 code->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
582 | R500_ALU_RGBA_ADDRD(dest);
583 switch (opcode) {
584 case OPCODE_COS:
585 code->inst[counter].inst4 |= R500_ALPHA_OP_COS;
586 break;
587 case OPCODE_EX2:
588 code->inst[counter].inst4 |= R500_ALPHA_OP_EX2;
589 break;
590 case OPCODE_LG2:
591 code->inst[counter].inst4 |= R500_ALPHA_OP_LN2;
592 break;
593 case OPCODE_RCP:
594 code->inst[counter].inst4 |= R500_ALPHA_OP_RCP;
595 break;
596 case OPCODE_RSQ:
597 code->inst[counter].inst4 |= R500_ALPHA_OP_RSQ;
598 break;
599 case OPCODE_SIN:
600 code->inst[counter].inst4 |= R500_ALPHA_OP_SIN;
601 break;
602 default:
603 ERROR("Bad opcode in emit_sop: %d\n", opcode);
604 break;
605 }
606 }
607
608 static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi, int counter) {
609 PROG_CODE;
610 GLuint src[3], dest = 0;
611 int temp_swiz = 0;
612
613 if (fpi->Opcode != OPCODE_KIL) {
614 dest = make_dest(cs, fpi->DstReg);
615 }
616
617 switch (fpi->Opcode) {
618 case OPCODE_ABS:
619 emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
620 code->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
621 | R500_ALU_RGB_MOD_B_ABS;
622 code->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
623 | R500_ALPHA_MOD_B_ABS;
624 break;
625 case OPCODE_ADD:
626 /* Variation on MAD: 1*src0+src1 */
627 emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
628 break;
629 case OPCODE_CMP:
630 /* This inst's selects need to be swapped as follows:
631 * 0 -> C ; 1 -> B ; 2 -> A */
632 src[0] = make_src(cs, fpi->SrcReg[0]);
633 src[1] = make_src(cs, fpi->SrcReg[1]);
634 src[2] = make_src(cs, fpi->SrcReg[2]);
635 emit_alu(cs, counter, fpi);
636 code->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
637 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
638 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
639 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
640 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
641 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
642 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
643 code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
644 | R500_ALPHA_ADDRD(dest)
645 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
646 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
647 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
648 | R500_ALU_RGBA_ADDRD(dest)
649 | R500_ALU_RGBA_SEL_C_SRC2
650 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
651 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
652 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
653 break;
654 case OPCODE_COS:
655 src[0] = make_src(cs, fpi->SrcReg[0]);
656 src[1] = emit_const4fv(cs, RCP_2PI);
657 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
658 | (R500_WRITEMASK_ARGB << 11);
659 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
660 | R500_RGB_ADDR1(src[1]);
661 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
662 | R500_ALPHA_ADDR1(src[1]);
663 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
664 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
665 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
666 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
667 | R500_ALPHA_ADDRD(get_temp(cs, 0))
668 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
669 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
670 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
671 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
672 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
673 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
674 counter++;
675 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
676 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
677 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
678 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
679 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
680 code->inst[counter].inst4 = R500_ALPHA_OP_FRC
681 | R500_ALPHA_ADDRD(get_temp(cs, 1))
682 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
683 code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
684 | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
685 counter++;
686 emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
687 break;
688 case OPCODE_DP3:
689 src[0] = make_src(cs, fpi->SrcReg[0]);
690 src[1] = make_src(cs, fpi->SrcReg[1]);
691 emit_alu(cs, counter, fpi);
692 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
693 | R500_RGB_ADDR1(src[1]);
694 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
695 | R500_ALPHA_ADDR1(src[1]);
696 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
697 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
698 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
699 code->inst[counter].inst4 |= R500_ALPHA_OP_DP
700 | R500_ALPHA_ADDRD(dest)
701 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
702 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
703 code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
704 | R500_ALU_RGBA_ADDRD(dest);
705 break;
706 case OPCODE_DP4:
707 src[0] = make_src(cs, fpi->SrcReg[0]);
708 src[1] = make_src(cs, fpi->SrcReg[1]);
709 /* Based on DP3 */
710 emit_alu(cs, counter, fpi);
711 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
712 | R500_RGB_ADDR1(src[1]);
713 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
714 | R500_ALPHA_ADDR1(src[1]);
715 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
716 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
717 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
718 code->inst[counter].inst4 |= R500_ALPHA_OP_DP
719 | R500_ALPHA_ADDRD(dest)
720 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
721 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
722 code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
723 | R500_ALU_RGBA_ADDRD(dest);
724 break;
725 case OPCODE_DPH:
726 src[0] = make_src(cs, fpi->SrcReg[0]);
727 src[1] = make_src(cs, fpi->SrcReg[1]);
728 /* Based on DP3 */
729 emit_alu(cs, counter, fpi);
730 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
731 | R500_RGB_ADDR1(src[1]);
732 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
733 | R500_ALPHA_ADDR1(src[1]);
734 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
735 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
736 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
737 code->inst[counter].inst4 |= R500_ALPHA_OP_DP
738 | R500_ALPHA_ADDRD(dest)
739 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
740 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
741 code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
742 | R500_ALU_RGBA_ADDRD(dest);
743 break;
744 case OPCODE_DST:
745 src[0] = make_src(cs, fpi->SrcReg[0]);
746 src[1] = make_src(cs, fpi->SrcReg[1]);
747 /* [1, src0.y*src1.y, src0.z, src1.w]
748 * So basically MUL with lotsa swizzling. */
749 emit_alu(cs, counter, fpi);
750 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
751 | R500_RGB_ADDR1(src[1]);
752 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
753 | R500_ALPHA_ADDR1(src[1]);
754 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
755 | R500_ALU_RGB_SEL_B_SRC1;
756 /* Select [1, y, z, 1] */
757 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE;
758 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz);
759 /* Select [1, y, 1, w] */
760 temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6);
761 code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz);
762 code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
763 | R500_ALPHA_ADDRD(dest)
764 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
765 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
766 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
767 | R500_ALU_RGBA_ADDRD(dest)
768 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
769 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
770 break;
771 case OPCODE_EX2:
772 src[0] = make_src(cs, fpi->SrcReg[0]);
773 emit_sop(cs, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
774 break;
775 case OPCODE_FLR:
776 src[0] = make_src(cs, fpi->SrcReg[0]);
777 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
778 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
779 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
780 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
781 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
782 code->inst[counter].inst4 |= R500_ALPHA_OP_FRC
783 | R500_ALPHA_ADDRD(get_temp(cs, 0))
784 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
785 code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
786 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0));
787 counter++;
788 emit_alu(cs, counter, fpi);
789 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
790 | R500_RGB_ADDR1(get_temp(cs, 0));
791 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
792 | R500_ALPHA_ADDR1(get_temp(cs, 0));
793 code->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
794 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
795 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
796 | R500_ALPHA_ADDRD(dest)
797 | R500_ALPHA_SWIZ_A_A
798 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
799 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
800 | R500_ALU_RGBA_ADDRD(dest)
801 | R500_ALU_RGBA_SEL_C_SRC1
802 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
803 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
804 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]))
805 | R500_ALU_RGBA_MOD_C_NEG;
806 break;
807 case OPCODE_FRC:
808 src[0] = make_src(cs, fpi->SrcReg[0]);
809 emit_alu(cs, counter, fpi);
810 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
811 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
812 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
813 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
814 code->inst[counter].inst4 |= R500_ALPHA_OP_FRC
815 | R500_ALPHA_ADDRD(dest)
816 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
817 code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
818 | R500_ALU_RGBA_ADDRD(dest);
819 break;
820 case OPCODE_LG2:
821 src[0] = make_src(cs, fpi->SrcReg[0]);
822 emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
823 break;
824 case OPCODE_LIT:
825 src[0] = make_src(cs, fpi->SrcReg[0]);
826 src[1] = emit_const4fv(cs, LIT);
827 /* First inst: MAX temp, input, [0, 0, 0, -128]
828 * Write: RG, A */
829 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
830 | (R500_WRITEMASK_ARG << 11);
831 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
832 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
833 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
834 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
835 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
836 code->inst[counter].inst4 = R500_ALPHA_OP_MAX
837 | R500_ALPHA_ADDRD(get_temp(cs, 0))
838 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
839 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
840 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
841 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0));
842 counter++;
843 /* Second inst: MIN temp, temp, [x, x, x, 128]
844 * Write: A */
845 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11);
846 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) | R500_RGB_ADDR1(src[1]);
847 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) | R500_ALPHA_ADDR1(src[1]);
848 /* code->inst[counter].inst3; */
849 code->inst[counter].inst4 = R500_ALPHA_OP_MAX
850 | R500_ALPHA_ADDRD(dest)
851 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
852 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
853 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
854 | R500_ALU_RGBA_ADDRD(dest);
855 counter++;
856 /* Third-fifth insts: POW temp, temp.y, temp.w
857 * Write: B */
858 emit_sop(cs, counter, fpi, OPCODE_LG2, get_temp(cs, 0), SWIZZLE_Y, get_temp(cs, 1));
859 code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11);
860 counter++;
861 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
862 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 1))
863 | R500_RGB_ADDR1(get_temp(cs, 0));
864 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 1))
865 | R500_ALPHA_ADDR1(get_temp(cs, 0));
866 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
867 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
868 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
869 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
870 | R500_ALPHA_ADDRD(get_temp(cs, 1))
871 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
872 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
873 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
874 | R500_ALU_RGBA_ADDRD(get_temp(cs, 1))
875 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
876 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
877 counter++;
878 emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), SWIZZLE_W, get_temp(cs, 0));
879 code->inst[counter].inst0 |= (R500_WRITEMASK_B << 11);
880 counter++;
881 /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1];
882 * Write: ARGB
883 * This inst's selects need to be swapped as follows:
884 * 0 -> C ; 1 -> B ; 2 -> A */
885 emit_alu(cs, counter, fpi);
886 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
887 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
888 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
889 | R500_ALU_RGB_R_SWIZ_A_1
890 | R500_ALU_RGB_G_SWIZ_A_R
891 | R500_ALU_RGB_B_SWIZ_A_B
892 | R500_ALU_RGB_SEL_B_SRC0
893 | R500_ALU_RGB_R_SWIZ_B_1
894 | R500_ALU_RGB_G_SWIZ_B_R
895 | R500_ALU_RGB_B_SWIZ_B_0;
896 code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
897 | R500_ALPHA_ADDRD(dest)
898 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1
899 | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1;
900 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
901 | R500_ALU_RGBA_ADDRD(dest)
902 | R500_ALU_RGBA_SEL_C_SRC0
903 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
904 | R500_ALU_RGBA_R_SWIZ_R
905 | R500_ALU_RGBA_G_SWIZ_R
906 | R500_ALU_RGBA_B_SWIZ_R
907 | R500_ALU_RGBA_A_SWIZ_R;
908 break;
909 case OPCODE_LRP:
910 /* src0 * src1 + INV(src0) * src2
911 * 1) MUL src0, src1, temp
912 * 2) PRE 1-src0; MAD srcp, src2, temp */
913 src[0] = make_src(cs, fpi->SrcReg[0]);
914 src[1] = make_src(cs, fpi->SrcReg[1]);
915 src[2] = make_src(cs, fpi->SrcReg[2]);
916 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
917 | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11);
918 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
919 | R500_RGB_ADDR1(src[1]);
920 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
921 | R500_ALPHA_ADDR1(src[1]);
922 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
923 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
924 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
925 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
926 | R500_ALPHA_ADDRD(get_temp(cs, 0))
927 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
928 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
929 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
930 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
931 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
932 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
933 counter++;
934 emit_alu(cs, counter, fpi);
935 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
936 | R500_RGB_ADDR1(src[2])
937 | R500_RGB_ADDR2(get_temp(cs, 0))
938 | R500_RGB_SRCP_OP_1_MINUS_RGB0;
939 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
940 | R500_ALPHA_ADDR1(src[2])
941 | R500_ALPHA_ADDR2(get_temp(cs, 0))
942 | R500_ALPHA_SRCP_OP_1_MINUS_A0;
943 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP
944 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
945 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
946 code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
947 | R500_ALPHA_ADDRD(dest)
948 | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
949 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
950 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
951 | R500_ALU_RGBA_ADDRD(dest)
952 | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
953 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
954 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
955 break;
956 case OPCODE_MAD:
957 emit_mad(cs, counter, fpi, 0, 1, 2);
958 break;
959 case OPCODE_MAX:
960 src[0] = make_src(cs, fpi->SrcReg[0]);
961 src[1] = make_src(cs, fpi->SrcReg[1]);
962 emit_alu(cs, counter, fpi);
963 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
964 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
965 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
966 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
967 | R500_ALU_RGB_SEL_B_SRC1
968 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
969 code->inst[counter].inst4 |= R500_ALPHA_OP_MAX
970 | R500_ALPHA_ADDRD(dest)
971 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
972 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
973 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
974 | R500_ALU_RGBA_ADDRD(dest);
975 break;
976 case OPCODE_MIN:
977 src[0] = make_src(cs, fpi->SrcReg[0]);
978 src[1] = make_src(cs, fpi->SrcReg[1]);
979 emit_alu(cs, counter, fpi);
980 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
981 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
982 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
983 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
984 | R500_ALU_RGB_SEL_B_SRC1
985 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
986 code->inst[counter].inst4 |= R500_ALPHA_OP_MIN
987 | R500_ALPHA_ADDRD(dest)
988 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
989 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
990 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
991 | R500_ALU_RGBA_ADDRD(dest);
992 break;
993 case OPCODE_MOV:
994 emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
995 break;
996 case OPCODE_MUL:
997 /* Variation on MAD: src0*src1+0 */
998 emit_mad(cs, counter, fpi, 0, 1, R500_SWIZZLE_ZERO);
999 break;
1000 case OPCODE_POW:
1001 /* POW(a,b) = EX2(LN2(a)*b) */
1002 src[0] = make_src(cs, fpi->SrcReg[0]);
1003 src[1] = make_src(cs, fpi->SrcReg[1]);
1004 emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(cs, 0));
1005 code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11);
1006 counter++;
1007 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1008 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0))
1009 | R500_RGB_ADDR1(src[1]);
1010 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0))
1011 | R500_ALPHA_ADDR1(src[1]);
1012 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1013 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
1014 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
1015 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1016 | R500_ALPHA_ADDRD(get_temp(cs, 1))
1017 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
1018 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
1019 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1020 | R500_ALU_RGBA_ADDRD(get_temp(cs, 1))
1021 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1022 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1023 counter++;
1024 emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
1025 break;
1026 case OPCODE_RCP:
1027 src[0] = make_src(cs, fpi->SrcReg[0]);
1028 emit_sop(cs, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
1029 break;
1030 case OPCODE_RSQ:
1031 src[0] = make_src(cs, fpi->SrcReg[0]);
1032 emit_sop(cs, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest);
1033 break;
1034 case OPCODE_SCS:
1035 src[0] = make_src(cs, fpi->SrcReg[0]);
1036 src[1] = emit_const4fv(cs, RCP_2PI);
1037 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1038 | (R500_WRITEMASK_ARGB << 11);
1039 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1040 | R500_RGB_ADDR1(src[1]);
1041 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1042 | R500_ALPHA_ADDR1(src[1]);
1043 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1044 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
1045 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
1046 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1047 | R500_ALPHA_ADDRD(get_temp(cs, 0))
1048 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
1049 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
1050 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1051 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
1052 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1053 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1054 counter++;
1055 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1056 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
1057 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
1058 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1059 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
1060 code->inst[counter].inst4 = R500_ALPHA_OP_FRC
1061 | R500_ALPHA_ADDRD(get_temp(cs, 1))
1062 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
1063 code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
1064 | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
1065 counter++;
1066 /* Do a cosine, then a sine, masking out the channels we want to protect. */
1067 /* Cosine only goes in R (x) channel. */
1068 fpi->DstReg.WriteMask = 0x1;
1069 emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
1070 counter++;
1071 /* Sine only goes in G (y) channel. */
1072 fpi->DstReg.WriteMask = 0x2;
1073 emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
1074 break;
1075 case OPCODE_SGE:
1076 src[0] = make_src(cs, fpi->SrcReg[0]);
1077 src[1] = make_src(cs, fpi->SrcReg[1]);
1078 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1079 | (R500_WRITEMASK_ARGB << 11);
1080 code->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1081 | R500_RGB_ADDR2(src[1]);
1082 code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1083 | R500_ALPHA_ADDR2(src[1]);
1084 code->inst[counter].inst3 = /* 1 */
1085 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1086 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1087 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1088 | R500_ALPHA_ADDRD(get_temp(cs, 0))
1089 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1090 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1091 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1092 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
1093 | R500_ALU_RGBA_SEL_C_SRC2
1094 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1095 | R500_ALU_RGBA_MOD_C_NEG
1096 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1097 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1098 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1099 counter++;
1100 /* This inst's selects need to be swapped as follows:
1101 * 0 -> C ; 1 -> B ; 2 -> A */
1102 emit_alu(cs, counter, fpi);
1103 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
1104 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
1105 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1106 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1107 | R500_ALU_RGB_SEL_B_SRC0
1108 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
1109 code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1110 | R500_ALPHA_ADDRD(dest)
1111 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1112 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
1113 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1114 | R500_ALU_RGBA_ADDRD(dest)
1115 | R500_ALU_RGBA_SEL_C_SRC0
1116 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1117 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1118 | R500_ALU_RGBA_A_SWIZ_A;
1119 break;
1120 case OPCODE_SIN:
1121 src[0] = make_src(cs, fpi->SrcReg[0]);
1122 src[1] = emit_const4fv(cs, RCP_2PI);
1123 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1124 | (R500_WRITEMASK_ARGB << 11);
1125 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1126 | R500_RGB_ADDR1(src[1]);
1127 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1128 | R500_ALPHA_ADDR1(src[1]);
1129 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1130 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
1131 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB);
1132 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1133 | R500_ALPHA_ADDRD(get_temp(cs, 0))
1134 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A
1135 | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A;
1136 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1137 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
1138 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1139 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1140 counter++;
1141 code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11);
1142 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
1143 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
1144 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1145 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB);
1146 code->inst[counter].inst4 = R500_ALPHA_OP_FRC
1147 | R500_ALPHA_ADDRD(get_temp(cs, 1))
1148 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A;
1149 code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
1150 | R500_ALU_RGBA_ADDRD(get_temp(cs, 1));
1151 counter++;
1152 emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest);
1153 break;
1154 case OPCODE_SLT:
1155 src[0] = make_src(cs, fpi->SrcReg[0]);
1156 src[1] = make_src(cs, fpi->SrcReg[1]);
1157 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1158 | (R500_WRITEMASK_ARGB << 11);
1159 code->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
1160 | R500_RGB_ADDR2(src[1]);
1161 code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
1162 | R500_ALPHA_ADDR2(src[1]);
1163 code->inst[counter].inst3 = /* 1 */
1164 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
1165 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
1166 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1167 | R500_ALPHA_ADDRD(get_temp(cs, 0))
1168 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
1169 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
1170 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1171 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
1172 | R500_ALU_RGBA_SEL_C_SRC2
1173 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
1174 | R500_ALU_RGBA_MOD_C_NEG
1175 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
1176 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
1177 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
1178 counter++;
1179 /* This inst's selects need to be swapped as follows:
1180 * 0 -> C ; 1 -> B ; 2 -> A */
1181 emit_alu(cs, counter, fpi);
1182 code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0));
1183 code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0));
1184 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1185 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
1186 | R500_ALU_RGB_SEL_B_SRC0
1187 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
1188 code->inst[counter].inst4 |= R500_ALPHA_OP_CMP
1189 | R500_ALPHA_ADDRD(dest)
1190 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
1191 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE);
1192 code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
1193 | R500_ALU_RGBA_ADDRD(dest)
1194 | R500_ALU_RGBA_SEL_C_SRC0
1195 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1196 | R500_ALU_RGBA_ALPHA_SEL_C_SRC0
1197 | R500_ALU_RGBA_A_SWIZ_A;
1198 break;
1199 case OPCODE_SUB:
1200 /* Variation on MAD: 1*src0-src1 */
1201 fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */
1202 emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1);
1203 break;
1204 case OPCODE_SWZ:
1205 /* TODO: The rarer negation masks! */
1206 emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest);
1207 break;
1208 case OPCODE_XPD:
1209 /* src0 * src1 - src1 * src0
1210 * 1) MUL temp.xyz, src0.yzx, src1.zxy
1211 * 2) MAD src0.zxy, src1.yzx, -temp.xyz */
1212 src[0] = make_src(cs, fpi->SrcReg[0]);
1213 src[1] = make_src(cs, fpi->SrcReg[1]);
1214 code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT
1215 | (R500_WRITEMASK_RGB << 11);
1216 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1217 | R500_RGB_ADDR1(src[1]);
1218 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1219 | R500_ALPHA_ADDR1(src[1]);
1220 /* Select [y, z, x] */
1221 temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
1222 temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
1223 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1224 | MAKE_SWIZ_RGB_A(temp_swiz);
1225 /* Select [z, x, y] */
1226 temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
1227 temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
1228 code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
1229 | MAKE_SWIZ_RGB_B(temp_swiz);
1230 code->inst[counter].inst4 = R500_ALPHA_OP_MAD
1231 | R500_ALPHA_ADDRD(get_temp(cs, 0))
1232 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
1233 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
1234 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1235 | R500_ALU_RGBA_ADDRD(get_temp(cs, 0))
1236 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
1237 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
1238 counter++;
1239 emit_alu(cs, counter, fpi);
1240 code->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
1241 | R500_RGB_ADDR1(src[1])
1242 | R500_RGB_ADDR2(get_temp(cs, 0));
1243 code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
1244 | R500_ALPHA_ADDR1(src[1])
1245 | R500_ALPHA_ADDR2(get_temp(cs, 0));
1246 /* Select [z, x, y] */
1247 temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]);
1248 temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6);
1249 code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
1250 | MAKE_SWIZ_RGB_A(temp_swiz);
1251 /* Select [y, z, x] */
1252 temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]);
1253 temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6);
1254 code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1
1255 | MAKE_SWIZ_RGB_B(temp_swiz);
1256 code->inst[counter].inst4 |= R500_ALPHA_OP_MAD
1257 | R500_ALPHA_ADDRD(dest)
1258 | R500_ALPHA_SWIZ_A_1
1259 | R500_ALPHA_SWIZ_B_1;
1260 code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
1261 | R500_ALU_RGBA_ADDRD(dest)
1262 | R500_ALU_RGBA_SEL_C_SRC2
1263 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB)
1264 | R500_ALU_RGBA_MOD_C_NEG
1265 | R500_ALU_RGBA_A_SWIZ_0;
1266 break;
1267 case OPCODE_KIL:
1268 case OPCODE_TEX:
1269 case OPCODE_TXB:
1270 case OPCODE_TXP:
1271 emit_tex(cs, fpi, dest, counter);
1272 if (fpi->DstReg.File == PROGRAM_OUTPUT)
1273 counter++;
1274 break;
1275 default:
1276 ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode));
1277 break;
1278 }
1279
1280 /* Finishing touches */
1281 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
1282 code->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
1283 }
1284
1285 counter++;
1286
1287 return counter;
1288 }
1289
1290 static GLboolean parse_program(struct r500_pfs_compile_state *cs)
1291 {
1292 PROG_CODE;
1293 int clauseidx, counter = 0;
1294
1295 for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; clauseidx++) {
1296 struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx];
1297 struct prog_instruction* fpi;
1298
1299 int ip;
1300
1301 for (ip = 0; ip < clause->NumInstructions; ip++) {
1302 fpi = clause->Instructions + ip;
1303 counter = do_inst(cs, fpi, counter);
1304
1305 if (cs->compiler->fp->error)
1306 return GL_FALSE;
1307 }
1308 }
1309
1310 /* Finish him! (If it's an ALU/OUT instruction...) */
1311 if ((code->inst[counter-1].inst0 & 0x3) == 1) {
1312 code->inst[counter-1].inst0 |= R500_INST_LAST;
1313 } else {
1314 /* We still need to put an output inst, right? */
1315 WARN_ONCE("Final FP instruction is not an OUT.\n");
1316 }
1317
1318 cs->nrslots = counter;
1319
1320 code->max_temp_idx++;
1321
1322 return GL_TRUE;
1323 }
1324
1325 static void init_program(struct r500_pfs_compile_state *cs)
1326 {
1327 PROG_CODE;
1328 struct gl_fragment_program *mp = &cs->compiler->fp->mesa_program;
1329 struct prog_instruction *fpi;
1330 GLuint InputsRead = mp->Base.InputsRead;
1331 GLuint temps_used = 0;
1332 int i, j;
1333
1334 /* New compile, reset tracking data */
1335 cs->compiler->fp->optimization =
1336 driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization");
1337 cs->compiler->fp->translated = GL_FALSE;
1338 cs->compiler->fp->error = GL_FALSE;
1339 code->const_nr = 0;
1340 /* Size of pixel stack, plus 1. */
1341 code->max_temp_idx = 1;
1342 /* Temp register offset. */
1343 code->temp_reg_offset = 0;
1344 /* Whether or not we perform any depth writing. */
1345 cs->compiler->fp->writes_depth = GL_FALSE;
1346
1347 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
1348 for (j = 0; j < 3; j++) {
1349 cs->slot[i].vsrc[j] = SRC_CONST;
1350 cs->slot[i].ssrc[j] = SRC_CONST;
1351 }
1352 }
1353
1354 /* Work out what temps the Mesa inputs correspond to, this must match
1355 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
1356 * configures itself based on the fragprog's InputsRead
1357 *
1358 * NOTE: this depends on get_hw_temp() allocating registers in order,
1359 * starting from register 0, so we're just going to do that instead.
1360 */
1361
1362 /* Texcoords come first */
1363 for (i = 0; i < cs->compiler->fp->ctx->Const.MaxTextureUnits; i++) {
1364 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
1365 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
1366 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
1367 code->temp_reg_offset;
1368 code->temp_reg_offset++;
1369 }
1370 }
1371 InputsRead &= ~FRAG_BITS_TEX_ANY;
1372
1373 /* fragment position treated as a texcoord */
1374 if (InputsRead & FRAG_BIT_WPOS) {
1375 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
1376 cs->inputs[FRAG_ATTRIB_WPOS].reg =
1377 code->temp_reg_offset;
1378 code->temp_reg_offset++;
1379 }
1380 InputsRead &= ~FRAG_BIT_WPOS;
1381
1382 /* Then primary colour */
1383 if (InputsRead & FRAG_BIT_COL0) {
1384 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
1385 cs->inputs[FRAG_ATTRIB_COL0].reg =
1386 code->temp_reg_offset;
1387 code->temp_reg_offset++;
1388 }
1389 InputsRead &= ~FRAG_BIT_COL0;
1390
1391 /* Secondary color */
1392 if (InputsRead & FRAG_BIT_COL1) {
1393 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
1394 cs->inputs[FRAG_ATTRIB_COL1].reg =
1395 code->temp_reg_offset;
1396 code->temp_reg_offset++;
1397 }
1398 InputsRead &= ~FRAG_BIT_COL1;
1399
1400 /* Anything else */
1401 if (InputsRead) {
1402 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
1403 /* force read from hwreg 0 for now */
1404 for (i = 0; i < 32; i++)
1405 if (InputsRead & (1 << i))
1406 cs->inputs[i].reg = 0;
1407 }
1408
1409 int clauseidx;
1410
1411 for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; ++clauseidx) {
1412 struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx];
1413 int ip;
1414
1415 for (ip = 0; ip < clause->NumInstructions; ip++) {
1416 fpi = clause->Instructions + ip;
1417 for (i = 0; i < 3; i++) {
1418 if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
1419 if (fpi->SrcReg[i].Index >= temps_used)
1420 temps_used = fpi->SrcReg[i].Index + 1;
1421 }
1422 }
1423 }
1424 }
1425
1426
1427 cs->temp_in_use = temps_used + 1;
1428
1429 code->max_temp_idx = code->temp_reg_offset + cs->temp_in_use;
1430
1431 if (RADEON_DEBUG & DEBUG_PIXEL)
1432 fprintf(stderr, "FP temp indices: code->max_temp_idx: %d cs->temp_in_use: %d\n", code->max_temp_idx, cs->temp_in_use);
1433 }
1434
1435 static void dumb_shader(struct r500_pfs_compile_state *cs)
1436 {
1437 PROG_CODE;
1438 code->inst[0].inst0 = R500_INST_TYPE_TEX
1439 | R500_INST_TEX_SEM_WAIT
1440 | R500_INST_RGB_WMASK_R
1441 | R500_INST_RGB_WMASK_G
1442 | R500_INST_RGB_WMASK_B
1443 | R500_INST_ALPHA_WMASK
1444 | R500_INST_RGB_CLAMP
1445 | R500_INST_ALPHA_CLAMP;
1446 code->inst[0].inst1 = R500_TEX_ID(0)
1447 | R500_TEX_INST_LD
1448 | R500_TEX_SEM_ACQUIRE
1449 | R500_TEX_IGNORE_UNCOVERED;
1450 code->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
1451 | R500_TEX_SRC_S_SWIZ_R
1452 | R500_TEX_SRC_T_SWIZ_G
1453 | R500_TEX_DST_ADDR(0)
1454 | R500_TEX_DST_R_SWIZ_R
1455 | R500_TEX_DST_G_SWIZ_G
1456 | R500_TEX_DST_B_SWIZ_B
1457 | R500_TEX_DST_A_SWIZ_A;
1458 code->inst[0].inst3 = R500_DX_ADDR(0)
1459 | R500_DX_S_SWIZ_R
1460 | R500_DX_T_SWIZ_R
1461 | R500_DX_R_SWIZ_R
1462 | R500_DX_Q_SWIZ_R
1463 | R500_DY_ADDR(0)
1464 | R500_DY_S_SWIZ_R
1465 | R500_DY_T_SWIZ_R
1466 | R500_DY_R_SWIZ_R
1467 | R500_DY_Q_SWIZ_R;
1468 code->inst[0].inst4 = 0x0;
1469 code->inst[0].inst5 = 0x0;
1470
1471 code->inst[1].inst0 = R500_INST_TYPE_OUT |
1472 R500_INST_TEX_SEM_WAIT |
1473 R500_INST_LAST |
1474 R500_INST_RGB_OMASK_R |
1475 R500_INST_RGB_OMASK_G |
1476 R500_INST_RGB_OMASK_B |
1477 R500_INST_ALPHA_OMASK;
1478 code->inst[1].inst1 = R500_RGB_ADDR0(0) |
1479 R500_RGB_ADDR1(0) |
1480 R500_RGB_ADDR1_CONST |
1481 R500_RGB_ADDR2(0) |
1482 R500_RGB_ADDR2_CONST |
1483 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
1484 code->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
1485 R500_ALPHA_ADDR1(0) |
1486 R500_ALPHA_ADDR1_CONST |
1487 R500_ALPHA_ADDR2(0) |
1488 R500_ALPHA_ADDR2_CONST |
1489 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
1490 code->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
1491 R500_ALU_RGB_R_SWIZ_A_R |
1492 R500_ALU_RGB_G_SWIZ_A_G |
1493 R500_ALU_RGB_B_SWIZ_A_B |
1494 R500_ALU_RGB_SEL_B_SRC0 |
1495 R500_ALU_RGB_R_SWIZ_B_1 |
1496 R500_ALU_RGB_B_SWIZ_B_1 |
1497 R500_ALU_RGB_G_SWIZ_B_1;
1498 code->inst[1].inst4 = R500_ALPHA_OP_MAD |
1499 R500_ALPHA_SWIZ_A_A |
1500 R500_ALPHA_SWIZ_B_1;
1501 code->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
1502 R500_ALU_RGBA_R_SWIZ_0 |
1503 R500_ALU_RGBA_G_SWIZ_0 |
1504 R500_ALU_RGBA_B_SWIZ_0 |
1505 R500_ALU_RGBA_A_SWIZ_0;
1506
1507 cs->nrslots = 2;
1508 }
1509
1510 GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler)
1511 {
1512 struct r500_pfs_compile_state cs;
1513 struct r500_fragment_program_code *code = compiler->code;
1514
1515 _mesa_memset(&cs, 0, sizeof(cs));
1516 cs.compiler = compiler;
1517 init_program(&cs);
1518
1519 if (!parse_program(&cs)) {
1520 #if 0
1521 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
1522 dumb_shader(fp);
1523 code->inst_offset = 0;
1524 code->inst_end = cs.nrslots - 1;
1525 #endif
1526 return GL_FALSE;
1527 }
1528
1529 code->inst_offset = 0;
1530 code->inst_end = cs.nrslots - 1;
1531
1532 return GL_TRUE;
1533 }