r5xx: We update max_temp_idx now, so no need to hard-code it.
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70 #define REG_SRC_REL (1 << 9)
71 #define REG_DEST_REL (1 << 7)
72
73 /* Swizzle tools */
74 #define R500_SWIZZLE_ZERO 4
75 #define R500_SWIZZLE_HALF 5
76 #define R500_SWIZZLE_ONE 6
77 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
78 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
79 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
80 /* Swizzles for inst2 */
81 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
82 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
83 /* Swizzles for inst3 */
84 #define MAKE_SWIZ_RGB_A(x) (x << 2)
85 #define MAKE_SWIZ_RGB_B(x) (x << 15)
86 /* Swizzles for inst4 */
87 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
88 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
89 /* Swizzle for inst5 */
90 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
92
93 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
94 GLuint swiz = 0x0;
95 GLuint temp;
96 /* This could be optimized, but it should be plenty fast already. */
97 int i;
98 for (i = 0; i < 3; i++) {
99 temp = (src.Swizzle >> i*3) & 0x7;
100 /* Fix SWIZZLE_ONE */
101 if (temp == 5) temp++;
102 swiz += temp << i*3;
103 }
104 return swiz;
105 }
106
107 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
108 GLuint swiz = (src.Swizzle >> 12) & 0x7;
109 if (swiz == 5) swiz++;
110 return swiz;
111 }
112
113 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
114 GLuint swiz = 0x0;
115 GLuint temp = src.Swizzle;
116 int i;
117 for (i = 0; i < 4; i++) {
118 swiz += (temp & 0x3) << i*2;
119 temp >>= 3;
120 }
121 return swiz;
122 }
123
124 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
125 static GLuint emit_const4fv(struct r500_fragment_program *fp,
126 const GLfloat * cp)
127 {
128 GLuint reg = 0x0;
129 int index;
130
131 for (index = 0; index < fp->const_nr; ++index) {
132 if (fp->constant[index] == cp)
133 break;
134 }
135
136 if (index >= fp->const_nr) {
137 /* TODO: This should be r5xx nums, not r300 */
138 if (index >= PFS_NUM_CONST_REGS) {
139 ERROR("Out of hw constants!\n");
140 return reg;
141 }
142
143 fp->const_nr++;
144 fp->constant[index] = cp;
145 }
146
147 reg = index | REG_CONSTANT;
148 return reg;
149 }
150
151 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
152 GLuint reg;
153 switch (src.File) {
154 case PROGRAM_TEMPORARY:
155 // reg = (src.Index << 0x1) | 0x1;
156 reg = src.Index;
157 if (src.Index > fp->max_temp_idx)
158 fp->max_temp_idx = src.Index;
159 break;
160 case PROGRAM_INPUT:
161 /* Ugly hack needed to work around Mesa;
162 * fragments don't get loaded right otherwise! */
163 reg = 0x0;
164 break;
165 case PROGRAM_STATE_VAR:
166 case PROGRAM_NAMED_PARAM:
167 case PROGRAM_CONSTANT:
168 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
169 ParameterValues[src.Index]);
170 break;
171 default:
172 ERROR("Can't handle src.File %x\n", src.File);
173 reg = 0x0;
174 break;
175 }
176 return reg;
177 }
178
179 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
180 GLuint reg;
181 switch (dest.File) {
182 case PROGRAM_TEMPORARY:
183 // reg = (dest.Index << 0x1) | 0x1;
184 reg = dest.Index;
185 if (dest.Index > fp->max_temp_idx)
186 fp->max_temp_idx = dest.Index;
187 break;
188 case PROGRAM_OUTPUT:
189 /* Eventually we may need to handle multiple
190 * rendering targets... */
191 reg = dest.Index;
192 break;
193 default:
194 ERROR("Can't handle dest.File %x\n", dest.File);
195 reg = 0x0;
196 break;
197 }
198 return reg;
199 }
200
201 static void emit_tex(struct r500_fragment_program *fp,
202 struct prog_instruction *fpi, int opcode, int dest, int counter)
203 {
204 int hwsrc, hwdest;
205 GLuint mask;
206
207 mask = fpi->DstReg.WriteMask << 11;
208 hwsrc = make_src(fp, fpi->SrcReg[0]);
209
210 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
211 | R500_INST_TEX_SEM_WAIT;
212
213 fp->inst[counter].inst1 = fpi->TexSrcUnit
214 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
215 switch (opcode) {
216 case OPCODE_TEX:
217 fp->inst[counter].inst1 |= R500_TEX_INST_LD;
218 break;
219 case OPCODE_TXP:
220 fp->inst[counter].inst1 |= R500_TEX_INST_PROJ;
221 }
222
223 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc)
224 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
225 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
226 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
227 | R500_TEX_DST_ADDR(dest)
228 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
229 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
230
231
232
233 fp->inst[counter].inst3 = 0x0;
234 fp->inst[counter].inst4 = 0x0;
235 fp->inst[counter].inst5 = 0x0;
236 }
237
238 static void dumb_shader(struct r500_fragment_program *fp)
239 {
240 fp->inst[0].inst0 = R500_INST_TYPE_TEX
241 | R500_INST_TEX_SEM_WAIT
242 | R500_INST_RGB_WMASK_R
243 | R500_INST_RGB_WMASK_G
244 | R500_INST_RGB_WMASK_B
245 | R500_INST_ALPHA_WMASK
246 | R500_INST_RGB_CLAMP
247 | R500_INST_ALPHA_CLAMP;
248 fp->inst[0].inst1 = R500_TEX_ID(0)
249 | R500_TEX_INST_LD
250 | R500_TEX_SEM_ACQUIRE
251 | R500_TEX_IGNORE_UNCOVERED;
252 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
253 | R500_TEX_SRC_S_SWIZ_R
254 | R500_TEX_SRC_T_SWIZ_G
255 | R500_TEX_DST_ADDR(0)
256 | R500_TEX_DST_R_SWIZ_R
257 | R500_TEX_DST_G_SWIZ_G
258 | R500_TEX_DST_B_SWIZ_B
259 | R500_TEX_DST_A_SWIZ_A;
260 fp->inst[0].inst3 = R500_DX_ADDR(0)
261 | R500_DX_S_SWIZ_R
262 | R500_DX_T_SWIZ_R
263 | R500_DX_R_SWIZ_R
264 | R500_DX_Q_SWIZ_R
265 | R500_DY_ADDR(0)
266 | R500_DY_S_SWIZ_R
267 | R500_DY_T_SWIZ_R
268 | R500_DY_R_SWIZ_R
269 | R500_DY_Q_SWIZ_R;
270 fp->inst[0].inst4 = 0x0;
271 fp->inst[0].inst5 = 0x0;
272
273 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
274 R500_INST_TEX_SEM_WAIT |
275 R500_INST_LAST |
276 R500_INST_RGB_OMASK_R |
277 R500_INST_RGB_OMASK_G |
278 R500_INST_RGB_OMASK_B |
279 R500_INST_ALPHA_OMASK;
280 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
281 R500_RGB_ADDR1(0) |
282 R500_RGB_ADDR1_CONST |
283 R500_RGB_ADDR2(0) |
284 R500_RGB_ADDR2_CONST |
285 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
286 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
287 R500_ALPHA_ADDR1(0) |
288 R500_ALPHA_ADDR1_CONST |
289 R500_ALPHA_ADDR2(0) |
290 R500_ALPHA_ADDR2_CONST |
291 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
292 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
293 R500_ALU_RGB_R_SWIZ_A_R |
294 R500_ALU_RGB_G_SWIZ_A_G |
295 R500_ALU_RGB_B_SWIZ_A_B |
296 R500_ALU_RGB_SEL_B_SRC0 |
297 R500_ALU_RGB_R_SWIZ_B_1 |
298 R500_ALU_RGB_B_SWIZ_B_1 |
299 R500_ALU_RGB_G_SWIZ_B_1;
300 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
301 R500_ALPHA_SWIZ_A_A |
302 R500_ALPHA_SWIZ_B_1;
303 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
304 R500_ALU_RGBA_R_SWIZ_0 |
305 R500_ALU_RGBA_G_SWIZ_0 |
306 R500_ALU_RGBA_B_SWIZ_0 |
307 R500_ALU_RGBA_A_SWIZ_0;
308
309 fp->cs->nrslots = 2;
310 fp->translated = GL_TRUE;
311 }
312
313 static void emit_alu(struct r500_fragment_program *fp) {
314 }
315
316 static GLboolean parse_program(struct r500_fragment_program *fp)
317 {
318 struct gl_fragment_program *mp = &fp->mesa_program;
319 const struct prog_instruction *inst = mp->Base.Instructions;
320 struct prog_instruction *fpi;
321 GLuint src[3], dest, temp[2];
322 int flags, mask, counter = 0;
323
324 if (!inst || inst[0].Opcode == OPCODE_END) {
325 ERROR("The program is empty!\n");
326 return GL_FALSE;
327 }
328
329 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
330
331 if (fpi->Opcode != OPCODE_KIL) {
332 dest = make_dest(fp, fpi->DstReg);
333 mask = fpi->DstReg.WriteMask << 11;
334 }
335
336 switch (fpi->Opcode) {
337 case OPCODE_ABS:
338 src[0] = make_src(fp, fpi->SrcReg[0]);
339 /* Variation on MOV */
340 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
341 | mask;
342 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
343 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
344 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
345 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
346 | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0
347 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
348 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
349 | R500_ALPHA_ADDRD(dest)
350 | R500_ALPHA_SEL_A_SRC0
351 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS
352 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
353 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
354 | R500_ALU_RGBA_ADDRD(dest);
355 break;
356 case OPCODE_ADD:
357 src[0] = make_src(fp, fpi->SrcReg[0]);
358 src[1] = make_src(fp, fpi->SrcReg[1]);
359 /* Variation on MAD: 1*src0+src1 */
360 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
361 | mask;
362 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
363 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(0);
364 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
365 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(0);
366 fp->inst[counter].inst3 = /* 1 */
367 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
368 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
369 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
370 | R500_ALPHA_ADDRD(dest)
371 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
372 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
373 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
374 | R500_ALU_RGBA_ADDRD(dest)
375 | R500_ALU_RGBA_SEL_C_SRC1
376 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
377 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
378 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
379 break;
380 case OPCODE_DP3:
381 src[0] = make_src(fp, fpi->SrcReg[0]);
382 src[1] = make_src(fp, fpi->SrcReg[1]);
383 src[2] = make_src(fp, fpi->SrcReg[2]);
384 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
385 | mask;
386 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
387 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
388 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
389 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
390 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
391 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
392 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
393 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
394 | R500_ALPHA_ADDRD(dest)
395 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
396 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
397 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
398 | R500_ALU_RGBA_ADDRD(dest)
399 | R500_ALU_RGBA_SEL_C_SRC2
400 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
401 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
402 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
403 break;
404 case OPCODE_DP4:
405 src[0] = make_src(fp, fpi->SrcReg[0]);
406 src[1] = make_src(fp, fpi->SrcReg[1]);
407 src[2] = make_src(fp, fpi->SrcReg[2]);
408 /* Based on DP3 */
409 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
410 | mask;
411 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
412 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
413 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
414 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
415 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
416 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
417 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
418 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
419 | R500_ALPHA_ADDRD(dest)
420 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
421 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
422 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
423 | R500_ALU_RGBA_ADDRD(dest)
424 | R500_ALU_RGBA_SEL_C_SRC2
425 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
426 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
427 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
428 break;
429 case OPCODE_MAD:
430 src[0] = make_src(fp, fpi->SrcReg[0]);
431 src[1] = make_src(fp, fpi->SrcReg[1]);
432 src[2] = make_src(fp, fpi->SrcReg[2]);
433 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
434 | mask;
435 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
436 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
437 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
438 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
439 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
440 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
441 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
442 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
443 | R500_ALPHA_ADDRD(dest)
444 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
445 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
446 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
447 | R500_ALU_RGBA_ADDRD(dest)
448 | R500_ALU_RGBA_SEL_C_SRC2
449 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
450 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
451 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
452 break;
453 case OPCODE_MAX:
454 src[0] = make_src(fp, fpi->SrcReg[0]);
455 src[1] = make_src(fp, fpi->SrcReg[0]);
456 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
457 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
458 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
459 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
460 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
461 | R500_ALU_RGB_SEL_B_SRC1
462 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
463 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
464 | R500_ALPHA_ADDRD(dest)
465 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
466 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
467 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
468 | R500_ALU_RGBA_ADDRD(dest);
469 break;
470 case OPCODE_MIN:
471 src[0] = make_src(fp, fpi->SrcReg[0]);
472 src[1] = make_src(fp, fpi->SrcReg[0]);
473 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
474 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
475 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
476 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
477 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
478 | R500_ALU_RGB_SEL_B_SRC1
479 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
480 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
481 | R500_ALPHA_ADDRD(dest)
482 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
483 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
484 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
485 | R500_ALU_RGBA_ADDRD(dest);
486 break;
487 case OPCODE_MOV:
488 src[0] = make_src(fp, fpi->SrcReg[0]);
489
490 /* changed to use MAD - not sure if we
491 ever have negative things which max will fail on */
492 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
493 | mask;
494 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
495 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
496 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
497 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB)
498 | R500_ALU_RGB_SEL_B_SRC0
499 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE);
500 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
501 | R500_ALPHA_ADDRD(dest)
502 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
503 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
504
505 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
506 | R500_ALU_RGBA_ADDRD(dest)
507 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
508 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
509 break;
510 case OPCODE_MUL:
511 src[0] = make_src(fp, fpi->SrcReg[0]);
512 src[1] = make_src(fp, fpi->SrcReg[1]);
513 /* Variation on MAD: src0*src1+0 */
514 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
515 | mask;
516 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
517 | R500_RGB_ADDR1(src[1]);
518 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
519 | R500_ALPHA_ADDR1(src[1]);
520 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
521 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
522 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
523 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
524 | R500_ALPHA_ADDRD(dest)
525 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
526 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
527 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
528 | R500_ALU_RGBA_ADDRD(dest)
529 // | R500_ALU_RGBA_SEL_C_SRC2
530 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
531 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
532 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
533 break;
534 case OPCODE_SUB:
535 src[0] = make_src(fp, fpi->SrcReg[0]);
536 src[1] = make_src(fp, fpi->SrcReg[1]);
537 /* Variation on MAD: 1*src0-src1 */
538 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
539 | mask;
540 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
541 | R500_RGB_ADDR2(src[1]);
542 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
543 | R500_ALPHA_ADDR2(src[1]);
544 fp->inst[counter].inst3 = /* 1 */
545 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
546 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
547 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
548 | R500_ALPHA_ADDRD(dest)
549 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
550 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
551 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
552 | R500_ALU_RGBA_ADDRD(dest)
553 | R500_ALU_RGBA_SEL_C_SRC2
554 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
555 | R500_ALU_RGBA_MOD_C_NEG
556 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
557 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
558 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
559 break;
560 case OPCODE_TEX:
561 emit_tex(fp, fpi, OPCODE_TEX, dest, counter);
562 break;
563 case OPCODE_TXP:
564 emit_tex(fp, fpi, OPCODE_TXP, dest, counter);
565 break;
566 default:
567 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
568 break;
569 }
570
571 /* Finishing touches */
572 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
573 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
574 }
575 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
576 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
577 | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G
578 | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK;
579 }
580
581 counter++;
582
583 if (fp->error)
584 return GL_FALSE;
585
586 }
587
588 fp->cs->nrslots = counter;
589
590 /* Finish him! (If it's an output instruction...)
591 * Yes, I know it's ugly... */
592 if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) {
593 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
594 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
595 } else {
596 /* We still need to put an output inst, right? */
597 }
598
599 fp->max_temp_idx++;
600
601 return GL_TRUE;
602 }
603
604 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
605 {
606 struct r300_pfs_compile_state *cs = NULL;
607 struct gl_fragment_program *mp = &fp->mesa_program;
608 struct prog_instruction *fpi;
609 GLuint InputsRead = mp->Base.InputsRead;
610 GLuint temps_used = 0; /* for fp->temps[] */
611 int i, j;
612
613 /* New compile, reset tracking data */
614 fp->optimization =
615 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
616 fp->translated = GL_FALSE;
617 fp->error = GL_FALSE;
618 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
619 fp->cur_node = 0;
620 fp->first_node_has_tex = 0;
621 fp->const_nr = 0;
622 fp->max_temp_idx = 0;
623 fp->node[0].alu_end = -1;
624 fp->node[0].tex_end = -1;
625
626 _mesa_memset(cs, 0, sizeof(*fp->cs));
627 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
628 for (j = 0; j < 3; j++) {
629 cs->slot[i].vsrc[j] = SRC_CONST;
630 cs->slot[i].ssrc[j] = SRC_CONST;
631 }
632 }
633
634 /* Work out what temps the Mesa inputs correspond to, this must match
635 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
636 * configures itself based on the fragprog's InputsRead
637 *
638 * NOTE: this depends on get_hw_temp() allocating registers in order,
639 * starting from register 0.
640 */
641
642 #if 0
643 /* Texcoords come first */
644 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
645 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
646 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
647 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
648 get_hw_temp(fp, 0);
649 }
650 }
651 InputsRead &= ~FRAG_BITS_TEX_ANY;
652
653 /* fragment position treated as a texcoord */
654 if (InputsRead & FRAG_BIT_WPOS) {
655 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
656 cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
657 insert_wpos(&mp->Base);
658 }
659 InputsRead &= ~FRAG_BIT_WPOS;
660
661 /* Then primary colour */
662 if (InputsRead & FRAG_BIT_COL0) {
663 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
664 cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
665 }
666 InputsRead &= ~FRAG_BIT_COL0;
667
668 /* Secondary color */
669 if (InputsRead & FRAG_BIT_COL1) {
670 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
671 cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
672 }
673 InputsRead &= ~FRAG_BIT_COL1;
674
675 /* Anything else */
676 if (InputsRead) {
677 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
678 /* force read from hwreg 0 for now */
679 for (i = 0; i < 32; i++)
680 if (InputsRead & (1 << i))
681 cs->inputs[i].reg = 0;
682 }
683 #endif
684
685 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
686 * That way, we can free up the reg when it's no longer needed
687 */
688 if (!mp->Base.Instructions) {
689 ERROR("No instructions found in program\n");
690 return;
691 }
692
693 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
694 int idx;
695
696 for (i = 0; i < 3; i++) {
697 idx = fpi->SrcReg[i].Index;
698 switch (fpi->SrcReg[i].File) {
699 case PROGRAM_TEMPORARY:
700 if (!(temps_used & (1 << idx))) {
701 cs->temps[idx].reg = -1;
702 cs->temps[idx].refcount = 1;
703 temps_used |= (1 << idx);
704 } else
705 cs->temps[idx].refcount++;
706 break;
707 case PROGRAM_INPUT:
708 cs->inputs[idx].refcount++;
709 break;
710 default:
711 break;
712 }
713 }
714
715 idx = fpi->DstReg.Index;
716 if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
717 if (!(temps_used & (1 << idx))) {
718 cs->temps[idx].reg = -1;
719 cs->temps[idx].refcount = 1;
720 temps_used |= (1 << idx);
721 } else
722 cs->temps[idx].refcount++;
723 }
724 }
725 cs->temp_in_use = temps_used;
726 }
727
728 static void update_params(struct r500_fragment_program *fp)
729 {
730 struct gl_fragment_program *mp = &fp->mesa_program;
731
732 /* Ask Mesa nicely to fill in ParameterValues for us */
733 if (mp->Base.Parameters)
734 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
735 }
736
737 void r500TranslateFragmentShader(r300ContextPtr r300,
738 struct r500_fragment_program *fp)
739 {
740
741 struct r300_pfs_compile_state *cs = NULL;
742
743 if (!fp->translated) {
744
745 /* I need to see what I'm working with! */
746 fprintf(stderr, "Mesa program:\n");
747 fprintf(stderr, "-------------\n");
748 _mesa_print_program(&fp->mesa_program.Base);
749 fflush(stdout);
750
751 init_program(r300, fp);
752 cs = fp->cs;
753
754 if (parse_program(fp) == GL_FALSE) {
755 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
756 dumb_shader(fp);
757 return;
758 }
759
760 /* Finish off */
761 fp->node[fp->cur_node].alu_end =
762 cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
763 if (fp->node[fp->cur_node].tex_end < 0)
764 fp->node[fp->cur_node].tex_end = 0;
765 fp->alu_offset = 0;
766 fp->alu_end = cs->nrslots - 1;
767 //assert(fp->node[fp->cur_node].alu_end >= 0);
768 //assert(fp->alu_end >= 0);
769
770 fp->translated = GL_TRUE;
771 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
772 }
773
774 update_params(fp);
775 }