r5xx: Swap sources for CMP.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 #define R500_US_NUM_TEMP_REGS 128
69 #define R500_US_NUM_CONST_REGS 256
70
71 /* "Register" flags */
72 #define REG_CONSTANT (1 << 8)
73 #define REG_SRC_REL (1 << 9)
74 #define REG_DEST_REL (1 << 7)
75
76 /* Swizzle tools */
77 #define R500_SWIZZLE_ZERO 4
78 #define R500_SWIZZLE_HALF 5
79 #define R500_SWIZZLE_ONE 6
80 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
81 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
82 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
83 /* Swizzles for inst2 */
84 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
85 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
86 /* Swizzles for inst3 */
87 #define MAKE_SWIZ_RGB_A(x) (x << 2)
88 #define MAKE_SWIZ_RGB_B(x) (x << 15)
89 /* Swizzles for inst4 */
90 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
92 /* Swizzle for inst5 */
93 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
94 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
95
96 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
97 GLuint swiz = 0x0;
98 GLuint temp;
99 /* This could be optimized, but it should be plenty fast already. */
100 int i;
101 for (i = 0; i < 3; i++) {
102 temp = (src.Swizzle >> i*3) & 0x7;
103 /* Fix SWIZZLE_ONE */
104 if (temp == 5) temp++;
105 swiz += temp << i*3;
106 }
107 return swiz;
108 }
109
110 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
111 GLuint swiz = (src.Swizzle >> 12) & 0x7;
112 if (swiz == 5) swiz++;
113 return swiz;
114 }
115
116 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
117 GLuint swiz = 0x0;
118 GLuint temp = src.Swizzle;
119 int i;
120 for (i = 0; i < 4; i++) {
121 swiz += (temp & 0x3) << i*2;
122 temp >>= 3;
123 }
124 return swiz;
125 }
126
127 static int get_temp(struct r500_fragment_program *fp, int slot) {
128
129 COMPILE_STATE;
130
131 int r = slot;
132
133 while (cs->inputs[r].refcount != 0) {
134 /* Crap, taken. */
135 r++;
136 }
137
138 fp->temp_reg_offset = r - slot;
139
140 if (r >= R500_US_NUM_TEMP_REGS) {
141 ERROR("Out of hardware temps!\n");
142 return 0;
143 }
144
145 if (r > fp->max_temp_idx)
146 fp->max_temp_idx = r;
147
148 return r;
149 }
150
151 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
152 static GLuint emit_const4fv(struct r500_fragment_program *fp,
153 const GLfloat * cp)
154 {
155 GLuint reg = 0x0;
156 int index;
157
158 for (index = 0; index < fp->const_nr; ++index) {
159 if (fp->constant[index] == cp)
160 break;
161 }
162
163 if (index >= fp->const_nr) {
164 if (index >= R500_US_NUM_CONST_REGS) {
165 ERROR("Out of hw constants!\n");
166 return reg;
167 }
168
169 fp->const_nr++;
170 fp->constant[index] = cp;
171 }
172
173 reg = index | REG_CONSTANT;
174 return reg;
175 }
176
177 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
178 COMPILE_STATE;
179 GLuint reg;
180 switch (src.File) {
181 case PROGRAM_TEMPORARY:
182 reg = src.Index + fp->temp_reg_offset;
183 break;
184 case PROGRAM_INPUT:
185 reg = cs->inputs[src.Index].reg;
186 break;
187 case PROGRAM_STATE_VAR:
188 case PROGRAM_NAMED_PARAM:
189 case PROGRAM_CONSTANT:
190 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
191 ParameterValues[src.Index]);
192 break;
193 default:
194 ERROR("Can't handle src.File %x\n", src.File);
195 reg = 0x0;
196 break;
197 }
198 return reg;
199 }
200
201 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
202 GLuint reg;
203 switch (dest.File) {
204 case PROGRAM_TEMPORARY:
205 reg = dest.Index + fp->temp_reg_offset;
206 break;
207 case PROGRAM_OUTPUT:
208 /* Eventually we may need to handle multiple
209 * rendering targets... */
210 reg = dest.Index;
211 break;
212 default:
213 ERROR("Can't handle dest.File %x\n", dest.File);
214 reg = 0x0;
215 break;
216 }
217 return reg;
218 }
219
220 static void emit_tex(struct r500_fragment_program *fp,
221 struct prog_instruction *fpi, int opcode, int dest, int counter)
222 {
223 int hwsrc, hwdest;
224 GLuint mask;
225
226 mask = fpi->DstReg.WriteMask << 11;
227 hwsrc = make_src(fp, fpi->SrcReg[0]);
228
229 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
230 | R500_INST_TEX_SEM_WAIT;
231
232 fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit)
233 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
234
235 if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX)
236 fp->inst[counter].inst1 |= R500_TEX_UNSCALED;
237
238 switch (opcode) {
239 case OPCODE_KIL:
240 fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL;
241 break;
242 case OPCODE_TEX:
243 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
244 break;
245 case OPCODE_TXB:
246 fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS;
247 break;
248 case OPCODE_TXP:
249 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
250 break;
251 default:
252 ERROR("emit_tex can't handle opcode %x\n", opcode);
253 }
254
255 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
256 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
257 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
258 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
259 | R500_TEX_DST_ADDR(dest)
260 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
261 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
262
263
264
265 fp->inst[counter].inst3 = 0x0;
266 fp->inst[counter].inst4 = 0x0;
267 fp->inst[counter].inst5 = 0x0;
268 }
269
270 static void dumb_shader(struct r500_fragment_program *fp)
271 {
272 fp->inst[0].inst0 = R500_INST_TYPE_TEX
273 | R500_INST_TEX_SEM_WAIT
274 | R500_INST_RGB_WMASK_R
275 | R500_INST_RGB_WMASK_G
276 | R500_INST_RGB_WMASK_B
277 | R500_INST_ALPHA_WMASK
278 | R500_INST_RGB_CLAMP
279 | R500_INST_ALPHA_CLAMP;
280 fp->inst[0].inst1 = R500_TEX_ID(0)
281 | R500_TEX_INST_LD
282 | R500_TEX_SEM_ACQUIRE
283 | R500_TEX_IGNORE_UNCOVERED;
284 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
285 | R500_TEX_SRC_S_SWIZ_R
286 | R500_TEX_SRC_T_SWIZ_G
287 | R500_TEX_DST_ADDR(0)
288 | R500_TEX_DST_R_SWIZ_R
289 | R500_TEX_DST_G_SWIZ_G
290 | R500_TEX_DST_B_SWIZ_B
291 | R500_TEX_DST_A_SWIZ_A;
292 fp->inst[0].inst3 = R500_DX_ADDR(0)
293 | R500_DX_S_SWIZ_R
294 | R500_DX_T_SWIZ_R
295 | R500_DX_R_SWIZ_R
296 | R500_DX_Q_SWIZ_R
297 | R500_DY_ADDR(0)
298 | R500_DY_S_SWIZ_R
299 | R500_DY_T_SWIZ_R
300 | R500_DY_R_SWIZ_R
301 | R500_DY_Q_SWIZ_R;
302 fp->inst[0].inst4 = 0x0;
303 fp->inst[0].inst5 = 0x0;
304
305 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
306 R500_INST_TEX_SEM_WAIT |
307 R500_INST_LAST |
308 R500_INST_RGB_OMASK_R |
309 R500_INST_RGB_OMASK_G |
310 R500_INST_RGB_OMASK_B |
311 R500_INST_ALPHA_OMASK;
312 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
313 R500_RGB_ADDR1(0) |
314 R500_RGB_ADDR1_CONST |
315 R500_RGB_ADDR2(0) |
316 R500_RGB_ADDR2_CONST |
317 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
318 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
319 R500_ALPHA_ADDR1(0) |
320 R500_ALPHA_ADDR1_CONST |
321 R500_ALPHA_ADDR2(0) |
322 R500_ALPHA_ADDR2_CONST |
323 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
324 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
325 R500_ALU_RGB_R_SWIZ_A_R |
326 R500_ALU_RGB_G_SWIZ_A_G |
327 R500_ALU_RGB_B_SWIZ_A_B |
328 R500_ALU_RGB_SEL_B_SRC0 |
329 R500_ALU_RGB_R_SWIZ_B_1 |
330 R500_ALU_RGB_B_SWIZ_B_1 |
331 R500_ALU_RGB_G_SWIZ_B_1;
332 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
333 R500_ALPHA_SWIZ_A_A |
334 R500_ALPHA_SWIZ_B_1;
335 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
336 R500_ALU_RGBA_R_SWIZ_0 |
337 R500_ALU_RGBA_G_SWIZ_0 |
338 R500_ALU_RGBA_B_SWIZ_0 |
339 R500_ALU_RGBA_A_SWIZ_0;
340
341 fp->cs->nrslots = 2;
342 fp->translated = GL_TRUE;
343 }
344
345 static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) {
346 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
347 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
348 /* output_mask */
349 | (fpi->DstReg.WriteMask << 14);
350 } else {
351 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
352 /* pixel_mask */
353 | (fpi->DstReg.WriteMask << 11);
354 }
355
356 fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT;
357 }
358
359 static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_src_register src, GLuint dest) {
360 /* The r3xx shader uses MAD to implement MOV. We are using CMP, since
361 * it is technically more accurate and recommended by ATI/AMD. */
362 GLuint src_reg = make_src(fp, src);
363 fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg);
364 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg);
365 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
366 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(src))
367 | R500_ALU_RGB_SEL_B_SRC0
368 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(src))
369 | R500_ALU_RGB_OMOD_DISABLE;
370 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
371 | R500_ALPHA_ADDRD(dest)
372 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(src))
373 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(src))
374 | R500_ALPHA_OMOD_DISABLE;
375 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
376 | R500_ALU_RGBA_ADDRD(dest)
377 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
378 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
379 }
380
381 static GLboolean parse_program(struct r500_fragment_program *fp)
382 {
383 struct gl_fragment_program *mp = &fp->mesa_program;
384 const struct prog_instruction *inst = mp->Base.Instructions;
385 struct prog_instruction *fpi;
386 GLuint src[3], dest, temp[2];
387 int flags, pixel_mask = 0, output_mask = 0, counter = 0;
388
389 if (!inst || inst[0].Opcode == OPCODE_END) {
390 ERROR("The program is empty!\n");
391 return GL_FALSE;
392 }
393
394 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
395
396 if (fpi->Opcode != OPCODE_KIL) {
397 dest = make_dest(fp, fpi->DstReg);
398
399 pixel_mask = fpi->DstReg.WriteMask << 11;
400 output_mask = fpi->DstReg.WriteMask << 14;
401 }
402
403 switch (fpi->Opcode) {
404 case OPCODE_ABS:
405 emit_alu(fp, counter, fpi);
406 emit_mov(fp, counter, fpi->SrcReg[0], dest);
407 fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS
408 | R500_ALU_RGB_MOD_B_ABS;
409 fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS
410 | R500_ALPHA_MOD_B_ABS;
411 break;
412 case OPCODE_ADD:
413 src[0] = make_src(fp, fpi->SrcReg[0]);
414 src[1] = make_src(fp, fpi->SrcReg[1]);
415 /* Variation on MAD: 1*src0+src1 */
416 emit_alu(fp, counter, fpi);
417 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
418 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
419 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
420 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
421 fp->inst[counter].inst3 = /* 1 */
422 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
423 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
424 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
425 | R500_ALPHA_ADDRD(dest)
426 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
427 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
428 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
429 | R500_ALU_RGBA_ADDRD(dest)
430 | R500_ALU_RGBA_SEL_C_SRC1
431 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
432 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
433 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
434 break;
435 case OPCODE_CMP:
436 /* This inst's selects need to be swapped as follows:
437 * 0 -> C ; 1 -> B ; 2 -> A */
438 src[0] = make_src(fp, fpi->SrcReg[0]);
439 src[1] = make_src(fp, fpi->SrcReg[1]);
440 src[2] = make_src(fp, fpi->SrcReg[2]);
441 emit_alu(fp, counter, fpi);
442 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2])
443 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]);
444 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2])
445 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]);
446 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
447 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2]))
448 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
449 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
450 | R500_ALPHA_ADDRD(dest)
451 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2]))
452 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
453 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
454 | R500_ALU_RGBA_ADDRD(dest)
455 | R500_ALU_RGBA_SEL_C_SRC2
456 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0]))
457 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
458 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0]));
459 break;
460 case OPCODE_COS:
461 src[0] = make_src(fp, fpi->SrcReg[0]);
462 emit_alu(fp, counter, fpi);
463 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
464 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
465 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
466 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
467 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
468 | R500_ALPHA_ADDRD(dest)
469 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
470 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
471 | R500_ALU_RGBA_ADDRD(dest);
472 break;
473 case OPCODE_DP3:
474 src[0] = make_src(fp, fpi->SrcReg[0]);
475 src[1] = make_src(fp, fpi->SrcReg[1]);
476 emit_alu(fp, counter, fpi);
477 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
478 | R500_RGB_ADDR1(src[1]);
479 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
480 | R500_ALPHA_ADDR1(src[1]);
481 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
482 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
483 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
484 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
485 | R500_ALPHA_ADDRD(dest)
486 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
487 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
488 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
489 | R500_ALU_RGBA_ADDRD(dest);
490 break;
491 case OPCODE_DP4:
492 src[0] = make_src(fp, fpi->SrcReg[0]);
493 src[1] = make_src(fp, fpi->SrcReg[1]);
494 /* Based on DP3 */
495 emit_alu(fp, counter, fpi);
496 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
497 | R500_RGB_ADDR1(src[1]);
498 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
499 | R500_ALPHA_ADDR1(src[1]);
500 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
501 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
502 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
503 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
504 | R500_ALPHA_ADDRD(dest)
505 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
506 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
507 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
508 | R500_ALU_RGBA_ADDRD(dest);
509 break;
510 case OPCODE_DPH:
511 src[0] = make_src(fp, fpi->SrcReg[0]);
512 src[1] = make_src(fp, fpi->SrcReg[1]);
513 /* Based on DP3 */
514 emit_alu(fp, counter, fpi);
515 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
516 | R500_RGB_ADDR1(src[1]);
517 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
518 | R500_ALPHA_ADDR1(src[1]);
519 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
520 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
521 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
522 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
523 | R500_ALPHA_ADDRD(dest)
524 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
525 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
526 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
527 | R500_ALU_RGBA_ADDRD(dest);
528 break;
529 case OPCODE_EX2:
530 src[0] = make_src(fp, fpi->SrcReg[0]);
531 emit_alu(fp, counter, fpi);
532 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
533 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
534 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
535 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
536 fp->inst[counter].inst4 = R500_ALPHA_OP_EX2
537 | R500_ALPHA_ADDRD(dest)
538 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
539 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
540 | R500_ALU_RGBA_ADDRD(dest);
541 break;
542 case OPCODE_FRC:
543 src[0] = make_src(fp, fpi->SrcReg[0]);
544 emit_alu(fp, counter, fpi);
545 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
546 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
547 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
548 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
549 fp->inst[counter].inst4 = R500_ALPHA_OP_FRC
550 | R500_ALPHA_ADDRD(dest)
551 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
552 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC
553 | R500_ALU_RGBA_ADDRD(dest);
554 break;
555 case OPCODE_KIL:
556 emit_tex(fp, fpi, OPCODE_KIL, dest, counter);
557 break;
558 case OPCODE_LG2:
559 src[0] = make_src(fp, fpi->SrcReg[0]);
560 emit_alu(fp, counter, fpi);
561 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
562 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
563 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
564 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
565 fp->inst[counter].inst4 = R500_ALPHA_OP_LN2
566 | R500_ALPHA_ADDRD(dest)
567 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
568 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
569 | R500_ALU_RGBA_ADDRD(dest);
570 break;
571 case OPCODE_MAD:
572 src[0] = make_src(fp, fpi->SrcReg[0]);
573 src[1] = make_src(fp, fpi->SrcReg[1]);
574 src[2] = make_src(fp, fpi->SrcReg[2]);
575 emit_alu(fp, counter, fpi);
576 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
577 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
578 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
579 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
580 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
581 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
582 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
583 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
584 | R500_ALPHA_ADDRD(dest)
585 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
586 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
587 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
588 | R500_ALU_RGBA_ADDRD(dest)
589 | R500_ALU_RGBA_SEL_C_SRC2
590 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
591 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
592 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
593 break;
594 case OPCODE_MAX:
595 src[0] = make_src(fp, fpi->SrcReg[0]);
596 src[1] = make_src(fp, fpi->SrcReg[1]);
597 emit_alu(fp, counter, fpi);
598 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
599 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
600 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
601 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
602 | R500_ALU_RGB_SEL_B_SRC1
603 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
604 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
605 | R500_ALPHA_ADDRD(dest)
606 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
607 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
608 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
609 | R500_ALU_RGBA_ADDRD(dest);
610 break;
611 case OPCODE_MIN:
612 src[0] = make_src(fp, fpi->SrcReg[0]);
613 src[1] = make_src(fp, fpi->SrcReg[1]);
614 emit_alu(fp, counter, fpi);
615 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
616 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
617 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
618 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
619 | R500_ALU_RGB_SEL_B_SRC1
620 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
621 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
622 | R500_ALPHA_ADDRD(dest)
623 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
624 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
625 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
626 | R500_ALU_RGBA_ADDRD(dest);
627 break;
628 case OPCODE_MOV:
629 emit_alu(fp, counter, fpi);
630 emit_mov(fp, counter, fpi->SrcReg[0], dest);
631 break;
632 case OPCODE_MUL:
633 src[0] = make_src(fp, fpi->SrcReg[0]);
634 src[1] = make_src(fp, fpi->SrcReg[1]);
635 /* Variation on MAD: src0*src1+0 */
636 emit_alu(fp, counter, fpi);
637 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
638 | R500_RGB_ADDR1(src[1]);
639 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
640 | R500_ALPHA_ADDR1(src[1]);
641 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
642 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
643 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
644 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
645 | R500_ALPHA_ADDRD(dest)
646 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
647 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
648 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
649 | R500_ALU_RGBA_ADDRD(dest)
650 // | R500_ALU_RGBA_SEL_C_SRC2
651 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
652 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
653 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
654 break;
655 case OPCODE_RCP:
656 src[0] = make_src(fp, fpi->SrcReg[0]);
657 emit_alu(fp, counter, fpi);
658 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
659 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
660 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
661 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
662 fp->inst[counter].inst4 = R500_ALPHA_OP_RCP
663 | R500_ALPHA_ADDRD(dest)
664 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
665 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
666 | R500_ALU_RGBA_ADDRD(dest);
667 break;
668 case OPCODE_RSQ:
669 src[0] = make_src(fp, fpi->SrcReg[0]);
670 emit_alu(fp, counter, fpi);
671 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
672 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
673 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
674 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
675 fp->inst[counter].inst4 = R500_ALPHA_OP_RSQ
676 | R500_ALPHA_ADDRD(dest)
677 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
678 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
679 | R500_ALU_RGBA_ADDRD(dest);
680 break;
681 case OPCODE_SCS:
682 /* TODO: Make this elegant! */
683 /* Do a cosine, then a sine, masking out the channels we want to protect. */
684 src[0] = make_src(fp, fpi->SrcReg[0]);
685 /* Cosine only goes in R (x) channel. */
686 fpi->DstReg.WriteMask = 0x1;
687 emit_alu(fp, counter, fpi);
688 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
689 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
690 | R500_INST_TEX_SEM_WAIT | 0x1 << 14;
691 } else {
692 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
693 | R500_INST_TEX_SEM_WAIT | 0x1 << 11;
694 }
695 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
696 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
697 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
698 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
699 fp->inst[counter].inst4 = R500_ALPHA_OP_COS
700 | R500_ALPHA_ADDRD(dest)
701 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
702 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
703 | R500_ALU_RGBA_ADDRD(dest);
704 counter++;
705 /* Sine only goes in G (y) channel. */
706 fpi->DstReg.WriteMask = 0x2;
707 emit_alu(fp, counter, fpi);
708 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
709 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
710 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
711 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
712 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
713 | R500_ALPHA_ADDRD(dest)
714 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
715 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
716 | R500_ALU_RGBA_ADDRD(dest);
717 counter++;
718 /* Put 0 into B,A (z,w) channels. */
719 fpi->DstReg.WriteMask = 0xC;
720 emit_alu(fp, counter, fpi);
721 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
722 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
723 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
724 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO)
725 | R500_ALU_RGB_SEL_B_SRC0
726 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO);
727 fp->inst[counter].inst4 = R500_ALPHA_OP_CMP
728 | R500_ALPHA_ADDRD(dest)
729 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO)
730 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO);
731 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP
732 | R500_ALU_RGBA_ADDRD(dest)
733 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
734 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
735 break;
736 case OPCODE_SIN:
737 src[0] = make_src(fp, fpi->SrcReg[0]);
738 emit_alu(fp, counter, fpi);
739 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
740 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
741 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
742 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]));
743 fp->inst[counter].inst4 = R500_ALPHA_OP_SIN
744 | R500_ALPHA_ADDRD(dest)
745 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]));
746 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP
747 | R500_ALU_RGBA_ADDRD(dest);
748 break;
749 case OPCODE_SUB:
750 src[0] = make_src(fp, fpi->SrcReg[0]);
751 src[1] = make_src(fp, fpi->SrcReg[1]);
752 /* Variation on MAD: 1*src0-src1 */
753 emit_alu(fp, counter, fpi);
754 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
755 | R500_RGB_ADDR2(src[1]);
756 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
757 | R500_ALPHA_ADDR2(src[1]);
758 fp->inst[counter].inst3 = /* 1 */
759 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
760 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
761 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
762 | R500_ALPHA_ADDRD(dest)
763 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
764 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
765 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
766 | R500_ALU_RGBA_ADDRD(dest)
767 | R500_ALU_RGBA_SEL_C_SRC2
768 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
769 | R500_ALU_RGBA_MOD_C_NEG
770 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
771 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
772 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
773 break;
774 case OPCODE_SWZ:
775 /* TODO: Negation masks! */
776 emit_alu(fp, counter, fpi);
777 emit_mov(fp, counter, fpi->SrcReg[0], dest);
778 break;
779 case OPCODE_TEX:
780 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
781 break;
782 case OPCODE_TXB:
783 emit_tex(fp, fpi, OPCODE_TXB, dest, counter);
784 break;
785 case OPCODE_TXP:
786 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
787 break;
788 default:
789 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
790 break;
791 }
792
793 /* Finishing touches */
794 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
795 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
796 }
797
798 counter++;
799
800 if (fp->error)
801 return GL_FALSE;
802
803 }
804
805 /* Finish him! (If it's an ALU/OUT instruction...) */
806 if ((fp->inst[counter-1].inst0 & 0x3) == 1) {
807 fp->inst[counter-1].inst0 |= R500_INST_LAST;
808 } else {
809 /* We still need to put an output inst, right? */
810 WARN_ONCE("Final FP instruction is not an OUT.\n");
811 #if 0
812 fp->inst[counter].inst0 = R500_INST_TYPE_OUT
813 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
814 output_mask;
815 fp->inst[counter].inst1 = R500_RGB_ADDR0(dest);
816 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest);
817 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
818 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
819 | R500_ALU_RGB_SEL_B_SRC0
820 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
821 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
822 | R500_ALPHA_ADDRD(0)
823 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
824 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
825 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
826 | R500_ALU_RGBA_ADDRD(0)
827 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
828 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
829 counter++;
830 #endif
831 }
832
833 fp->cs->nrslots = counter;
834
835 fp->max_temp_idx++;
836
837 return GL_TRUE;
838 }
839
840 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
841 {
842 struct r300_pfs_compile_state *cs = NULL;
843 struct gl_fragment_program *mp = &fp->mesa_program;
844 struct prog_instruction *fpi;
845 GLuint InputsRead = mp->Base.InputsRead;
846 GLuint temps_used = 0; /* for fp->temps[] */
847 int i, j;
848
849 /* New compile, reset tracking data */
850 fp->optimization =
851 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
852 fp->translated = GL_FALSE;
853 fp->error = GL_FALSE;
854 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
855 fp->cur_node = 0;
856 fp->first_node_has_tex = 0;
857 fp->const_nr = 0;
858 /* Size of pixel stack, plus 1. */
859 fp->max_temp_idx = 1;
860 /* Temp register offset. */
861 fp->temp_reg_offset = 0;
862 fp->node[0].alu_end = -1;
863 fp->node[0].tex_end = -1;
864
865 _mesa_memset(cs, 0, sizeof(*fp->cs));
866 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
867 for (j = 0; j < 3; j++) {
868 cs->slot[i].vsrc[j] = SRC_CONST;
869 cs->slot[i].ssrc[j] = SRC_CONST;
870 }
871 }
872
873 /* Work out what temps the Mesa inputs correspond to, this must match
874 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
875 * configures itself based on the fragprog's InputsRead
876 *
877 * NOTE: this depends on get_hw_temp() allocating registers in order,
878 * starting from register 0, so we're just going to do that instead.
879 */
880
881 /* Texcoords come first */
882 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
883 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
884 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
885 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
886 fp->temp_reg_offset;
887 fp->temp_reg_offset++;
888 }
889 }
890 InputsRead &= ~FRAG_BITS_TEX_ANY;
891
892 /* fragment position treated as a texcoord */
893 if (InputsRead & FRAG_BIT_WPOS) {
894 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
895 cs->inputs[FRAG_ATTRIB_WPOS].reg =
896 fp->temp_reg_offset;
897 fp->temp_reg_offset++;
898 }
899 InputsRead &= ~FRAG_BIT_WPOS;
900
901 /* Then primary colour */
902 if (InputsRead & FRAG_BIT_COL0) {
903 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
904 cs->inputs[FRAG_ATTRIB_COL0].reg =
905 fp->temp_reg_offset;
906 fp->temp_reg_offset++;
907 }
908 InputsRead &= ~FRAG_BIT_COL0;
909
910 /* Secondary color */
911 if (InputsRead & FRAG_BIT_COL1) {
912 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
913 cs->inputs[FRAG_ATTRIB_COL1].reg =
914 fp->temp_reg_offset;
915 fp->temp_reg_offset++;
916 }
917 InputsRead &= ~FRAG_BIT_COL1;
918
919 /* Anything else */
920 if (InputsRead) {
921 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
922 /* force read from hwreg 0 for now */
923 for (i = 0; i < 32; i++)
924 if (InputsRead & (1 << i))
925 cs->inputs[i].reg = 0;
926 }
927
928 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
929 * That way, we can free up the reg when it's no longer needed
930 */
931 if (!mp->Base.Instructions) {
932 ERROR("No instructions found in program, going to go die now.\n");
933 return;
934 }
935
936 #if 0
937 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
938 int idx;
939 for (i = 0; i < 3; i++) {
940 idx = fpi->SrcReg[i].Index;
941 if (fpi->SrcReg[i].File == PROGRAM_INPUT) {
942 cs->inputs[idx].refcount++;
943 if (fp->max_temp_idx < idx)
944 fp->max_temp_idx = idx;
945 }
946 }
947 }
948 #endif
949
950 fp->max_temp_idx = fp->temp_reg_offset + 1;
951
952 cs->temp_in_use = temps_used;
953 }
954
955 static void update_params(struct r500_fragment_program *fp)
956 {
957 struct gl_fragment_program *mp = &fp->mesa_program;
958
959 /* Ask Mesa nicely to fill in ParameterValues for us */
960 if (mp->Base.Parameters)
961 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
962 }
963
964 void r500TranslateFragmentShader(r300ContextPtr r300,
965 struct r500_fragment_program *fp)
966 {
967
968 struct r300_pfs_compile_state *cs = NULL;
969
970 if (!fp->translated) {
971
972 /* I need to see what I'm working with! */
973 fprintf(stderr, "Mesa program:\n");
974 fprintf(stderr, "-------------\n");
975 _mesa_print_program(&fp->mesa_program.Base);
976 fflush(stdout);
977
978 init_program(r300, fp);
979 cs = fp->cs;
980
981 if (parse_program(fp) == GL_FALSE) {
982 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
983 dumb_shader(fp);
984 fp->inst_offset = 0;
985 fp->inst_end = cs->nrslots - 1;
986 return;
987 }
988 fp->inst_offset = 0;
989 fp->inst_end = cs->nrslots - 1;
990
991 fp->translated = GL_TRUE;
992 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
993 }
994
995 update_params(fp);
996 }