r500: make tri-param work
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * \author Ben Skeggs <darktama@iinet.net.au>
32 *
33 * \author Jerome Glisse <j.glisse@gmail.com>
34 *
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
36 *
37 * \todo Depth write, WPOS/FOGC inputs
38 *
39 * \todo FogOption
40 *
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
42 * specific cases.
43 */
44
45 #include "glheader.h"
46 #include "macros.h"
47 #include "enums.h"
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
51
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
54 #include "r300_reg.h"
55 #include "r300_state.h"
56
57 /*
58 * Useful macros and values
59 */
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
64 } while(0)
65
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
67
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70 #define REG_SRC_REL (1 << 9)
71 #define REG_DEST_REL (1 << 7)
72
73 /* Swizzle tools */
74 #define R500_SWIZZLE_ZERO 4
75 #define R500_SWIZZLE_HALF 5
76 #define R500_SWIZZLE_ONE 6
77 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
78 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
79 /* Swizzles for inst2 */
80 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
81 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
82 /* Swizzles for inst3 */
83 #define MAKE_SWIZ_RGB_A(x) (x << 2)
84 #define MAKE_SWIZ_RGB_B(x) (x << 15)
85 /* Swizzles for inst4 */
86 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
87 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
88 /* Swizzle for inst5 */
89 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
90 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
91
92 static inline GLuint make_rgb_swizzle(struct prog_src_register src) {
93 GLuint swiz = 0x0;
94 GLuint temp;
95 /* This could be optimized, but it should be plenty fast already. */
96 int i;
97 for (i = 0; i < 3; i++) {
98 temp = (src.Swizzle >> i*3) & 0x7;
99 /* Fix SWIZZLE_ONE */
100 if (temp == 5) temp++;
101 swiz += temp << i*3;
102 }
103 return swiz;
104 }
105
106 static inline GLuint make_alpha_swizzle(struct prog_src_register src) {
107 GLuint swiz = (src.Swizzle >> 12) & 0x7;
108 if (swiz == 5) swiz++;
109 return swiz;
110 }
111
112 static inline GLuint make_strq_swizzle(struct prog_src_register src) {
113 GLuint swiz = 0x0;
114 GLuint temp = src.Swizzle;
115 int i;
116 for (i = 0; i < 4; i++) {
117 swiz += (temp & 0x3) << i*2;
118 temp >>= 3;
119 }
120 return swiz;
121 }
122
123 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
124 static GLuint emit_const4fv(struct r500_fragment_program *fp,
125 const GLfloat * cp)
126 {
127 GLuint reg = 0x0;
128 int index;
129
130 for (index = 0; index < fp->const_nr; ++index) {
131 if (fp->constant[index] == cp)
132 break;
133 }
134
135 if (index >= fp->const_nr) {
136 /* TODO: This should be r5xx nums, not r300 */
137 if (index >= PFS_NUM_CONST_REGS) {
138 ERROR("Out of hw constants!\n");
139 return reg;
140 }
141
142 fp->const_nr++;
143 fp->constant[index] = cp;
144 }
145
146 reg = index | REG_CONSTANT;
147 return reg;
148 }
149
150 static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) {
151 GLuint reg;
152 switch (src.File) {
153 case PROGRAM_TEMPORARY:
154 reg = (src.Index << 0x1) | 0x1;
155 break;
156 case PROGRAM_INPUT:
157 /* Ugly hack needed to work around Mesa;
158 * fragments don't get loaded right otherwise! */
159 reg = 0x0;
160 break;
161 case PROGRAM_STATE_VAR:
162 case PROGRAM_NAMED_PARAM:
163 case PROGRAM_CONSTANT:
164 reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters->
165 ParameterValues[src.Index]);
166 break;
167 default:
168 ERROR("Can't handle src.File %x\n", src.File);
169 reg = 0x0;
170 break;
171 }
172 return reg;
173 }
174
175 static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) {
176 GLuint reg;
177 switch (dest.File) {
178 case PROGRAM_TEMPORARY:
179 reg = (dest.Index << 0x1) | 0x1;
180 break;
181 case PROGRAM_OUTPUT:
182 /* Eventually we may need to handle multiple
183 * rendering targets... */
184 reg = dest.Index;
185 break;
186 default:
187 ERROR("Can't handle dest.File %x\n", dest.File);
188 reg = 0x0;
189 break;
190 }
191 return reg;
192 }
193
194 static void dumb_shader(struct r500_fragment_program *fp)
195 {
196 fp->inst[0].inst0 = R500_INST_TYPE_TEX
197 | R500_INST_TEX_SEM_WAIT
198 | R500_INST_RGB_WMASK_R
199 | R500_INST_RGB_WMASK_G
200 | R500_INST_RGB_WMASK_B
201 | R500_INST_ALPHA_WMASK
202 | R500_INST_RGB_CLAMP
203 | R500_INST_ALPHA_CLAMP;
204 fp->inst[0].inst1 = R500_TEX_ID(0)
205 | R500_TEX_INST_LD
206 | R500_TEX_SEM_ACQUIRE
207 | R500_TEX_IGNORE_UNCOVERED;
208 fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0)
209 | R500_TEX_SRC_S_SWIZ_R
210 | R500_TEX_SRC_T_SWIZ_G
211 | R500_TEX_DST_ADDR(0)
212 | R500_TEX_DST_R_SWIZ_R
213 | R500_TEX_DST_G_SWIZ_G
214 | R500_TEX_DST_B_SWIZ_B
215 | R500_TEX_DST_A_SWIZ_A;
216 fp->inst[0].inst3 = R500_DX_ADDR(0)
217 | R500_DX_S_SWIZ_R
218 | R500_DX_T_SWIZ_R
219 | R500_DX_R_SWIZ_R
220 | R500_DX_Q_SWIZ_R
221 | R500_DY_ADDR(0)
222 | R500_DY_S_SWIZ_R
223 | R500_DY_T_SWIZ_R
224 | R500_DY_R_SWIZ_R
225 | R500_DY_Q_SWIZ_R;
226 fp->inst[0].inst4 = 0x0;
227 fp->inst[0].inst5 = 0x0;
228
229 fp->inst[1].inst0 = R500_INST_TYPE_OUT |
230 R500_INST_TEX_SEM_WAIT |
231 R500_INST_LAST |
232 R500_INST_RGB_OMASK_R |
233 R500_INST_RGB_OMASK_G |
234 R500_INST_RGB_OMASK_B |
235 R500_INST_ALPHA_OMASK;
236 fp->inst[1].inst1 = R500_RGB_ADDR0(0) |
237 R500_RGB_ADDR1(0) |
238 R500_RGB_ADDR1_CONST |
239 R500_RGB_ADDR2(0) |
240 R500_RGB_ADDR2_CONST |
241 R500_RGB_SRCP_OP_1_MINUS_2RGB0;
242 fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) |
243 R500_ALPHA_ADDR1(0) |
244 R500_ALPHA_ADDR1_CONST |
245 R500_ALPHA_ADDR2(0) |
246 R500_ALPHA_ADDR2_CONST |
247 R500_ALPHA_SRCP_OP_1_MINUS_2A0;
248 fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 |
249 R500_ALU_RGB_R_SWIZ_A_R |
250 R500_ALU_RGB_G_SWIZ_A_G |
251 R500_ALU_RGB_B_SWIZ_A_B |
252 R500_ALU_RGB_SEL_B_SRC0 |
253 R500_ALU_RGB_R_SWIZ_B_1 |
254 R500_ALU_RGB_B_SWIZ_B_1 |
255 R500_ALU_RGB_G_SWIZ_B_1;
256 fp->inst[1].inst4 = R500_ALPHA_OP_MAD |
257 R500_ALPHA_SWIZ_A_A |
258 R500_ALPHA_SWIZ_B_1;
259 fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD |
260 R500_ALU_RGBA_R_SWIZ_0 |
261 R500_ALU_RGBA_G_SWIZ_0 |
262 R500_ALU_RGBA_B_SWIZ_0 |
263 R500_ALU_RGBA_A_SWIZ_0;
264
265 fp->cs->nrslots = 2;
266 fp->translated = GL_TRUE;
267 }
268
269 static void emit_alu(struct r500_fragment_program *fp) {
270 }
271
272 static GLboolean parse_program(struct r500_fragment_program *fp)
273 {
274 struct gl_fragment_program *mp = &fp->mesa_program;
275 const struct prog_instruction *inst = mp->Base.Instructions;
276 struct prog_instruction *fpi;
277 GLuint src[3], dest, temp[2];
278 int flags, mask, counter = 0;
279
280 if (!inst || inst[0].Opcode == OPCODE_END) {
281 ERROR("The program is empty!\n");
282 return GL_FALSE;
283 }
284
285 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
286
287 if (fpi->Opcode != OPCODE_KIL) {
288 dest = make_dest(fp, fpi->DstReg);
289 mask = fpi->DstReg.WriteMask << 11;
290 }
291
292 switch (fpi->Opcode) {
293 case OPCODE_ABS:
294 src[0] = make_src(fp, fpi->SrcReg[0]);
295 /* Variation on MOV */
296 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
297 | mask;
298 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
299 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
300 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
301 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
302 | R500_ALU_RGB_MOD_A_ABS | R500_ALU_RGB_SEL_B_SRC0
303 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
304 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
305 | R500_ALPHA_ADDRD(dest)
306 | R500_ALPHA_SEL_A_SRC0
307 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) | R500_ALPHA_MOD_A_ABS
308 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
309 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
310 | R500_ALU_RGBA_ADDRD(dest);
311 break;
312 case OPCODE_ADD:
313 src[0] = make_src(fp, fpi->SrcReg[0]);
314 src[1] = make_src(fp, fpi->SrcReg[1]);
315 /* Variation on MAD: 1*src0+src1 */
316 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
317 | mask;
318 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
319 | R500_RGB_ADDR1(src[1]);
320 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
321 | R500_ALPHA_ADDR1(src[1]);
322 fp->inst[counter].inst3 = /* 1 */
323 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
324 | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
325 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
326 | R500_ALPHA_ADDRD(dest)
327 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
328 | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
329 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
330 | R500_ALU_RGBA_ADDRD(dest)
331 | R500_ALU_RGBA_SEL_C_SRC1
332 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
333 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
334 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]));
335 break;
336 case OPCODE_DP3:
337 src[0] = make_src(fp, fpi->SrcReg[0]);
338 src[1] = make_src(fp, fpi->SrcReg[1]);
339 src[2] = make_src(fp, fpi->SrcReg[2]);
340 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
341 | mask;
342 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
343 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
344 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
345 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
346 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
347 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
348 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
349 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
350 | R500_ALPHA_ADDRD(dest)
351 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
352 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
353 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3
354 | R500_ALU_RGBA_ADDRD(dest)
355 | R500_ALU_RGBA_SEL_C_SRC2
356 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
357 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
358 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
359 break;
360 case OPCODE_DP4:
361 src[0] = make_src(fp, fpi->SrcReg[0]);
362 src[1] = make_src(fp, fpi->SrcReg[1]);
363 src[2] = make_src(fp, fpi->SrcReg[2]);
364 /* Based on DP3 */
365 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
366 | mask;
367 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
368 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
369 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
370 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
371 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
372 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
373 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
374 fp->inst[counter].inst4 = R500_ALPHA_OP_DP
375 | R500_ALPHA_ADDRD(dest)
376 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
377 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
378 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4
379 | R500_ALU_RGBA_ADDRD(dest)
380 | R500_ALU_RGBA_SEL_C_SRC2
381 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
382 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
383 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
384 break;
385 case OPCODE_MAD:
386 src[0] = make_src(fp, fpi->SrcReg[0]);
387 src[1] = make_src(fp, fpi->SrcReg[1]);
388 src[2] = make_src(fp, fpi->SrcReg[2]);
389 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
390 | mask;
391 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
392 | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[2]);
393 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
394 | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[2]);
395 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
396 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
397 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
398 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
399 | R500_ALPHA_ADDRD(dest)
400 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
401 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
402 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
403 | R500_ALU_RGBA_ADDRD(dest)
404 | R500_ALU_RGBA_SEL_C_SRC2
405 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2]))
406 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
407 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2]));
408 break;
409 case OPCODE_MAX:
410 src[0] = make_src(fp, fpi->SrcReg[0]);
411 src[1] = make_src(fp, fpi->SrcReg[0]);
412 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
413 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
414 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
415 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
416 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
417 | R500_ALU_RGB_SEL_B_SRC1
418 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
419 fp->inst[counter].inst4 = R500_ALPHA_OP_MAX
420 | R500_ALPHA_ADDRD(dest)
421 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
422 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
423 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX
424 | R500_ALU_RGBA_ADDRD(dest);
425 break;
426 case OPCODE_MIN:
427 src[0] = make_src(fp, fpi->SrcReg[0]);
428 src[1] = make_src(fp, fpi->SrcReg[0]);
429 fp->inst[counter].inst0 = R500_INST_TYPE_ALU | mask;
430 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]);
431 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]);
432 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
433 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
434 | R500_ALU_RGB_SEL_B_SRC1
435 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
436 fp->inst[counter].inst4 = R500_ALPHA_OP_MIN
437 | R500_ALPHA_ADDRD(dest)
438 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
439 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
440 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN
441 | R500_ALU_RGBA_ADDRD(dest);
442 break;
443 case OPCODE_MOV:
444 src[0] = make_src(fp, fpi->SrcReg[0]);
445
446 /* changed to use MAD - not sure if we
447 ever have negative things which max will fail on */
448 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
449 | mask;
450 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]);
451 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]);
452 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
453 | R500_ALU_RGB_R_SWIZ_A_R | R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B
454 | R500_ALU_RGB_SEL_B_SRC0
455 | R500_ALU_RGB_R_SWIZ_B_1 | R500_ALU_RGB_G_SWIZ_B_1 | R500_ALU_RGB_B_SWIZ_B_1;
456
457 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
458 | R500_ALPHA_ADDRD(dest)
459 | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0
460 | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1;
461
462 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
463 | R500_ALU_RGBA_ADDRD(dest)
464 | R500_ALU_RGBA_R_SWIZ_0 | R500_ALU_RGBA_G_SWIZ_0
465 | R500_ALU_RGBA_B_SWIZ_0 | R500_ALU_RGBA_A_SWIZ_0;
466 break;
467 case OPCODE_MUL:
468 src[0] = make_src(fp, fpi->SrcReg[0]);
469 src[1] = make_src(fp, fpi->SrcReg[1]);
470 /* Variation on MAD: src0*src1+0 */
471 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
472 | mask;
473 fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0])
474 | R500_RGB_ADDR1(src[1]);
475 fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0])
476 | R500_ALPHA_ADDR1(src[1]);
477 fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0
478 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0]))
479 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1]));
480 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
481 | R500_ALPHA_ADDRD(dest)
482 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0]))
483 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1]));
484 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
485 | R500_ALU_RGBA_ADDRD(dest)
486 // | R500_ALU_RGBA_SEL_C_SRC2
487 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO)
488 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
489 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO);
490 break;
491 case OPCODE_SUB:
492 src[0] = make_src(fp, fpi->SrcReg[0]);
493 src[1] = make_src(fp, fpi->SrcReg[1]);
494 /* Variation on MAD: 1*src0-src1 */
495 fp->inst[counter].inst0 = R500_INST_TYPE_ALU
496 | mask;
497 fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0])
498 | R500_RGB_ADDR2(src[1]);
499 fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0])
500 | R500_ALPHA_ADDR2(src[1]);
501 fp->inst[counter].inst3 = /* 1 */
502 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE)
503 | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0]));
504 fp->inst[counter].inst4 = R500_ALPHA_OP_MAD
505 | R500_ALPHA_ADDRD(dest)
506 | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE)
507 | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0]));
508 fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD
509 | R500_ALU_RGBA_ADDRD(dest)
510 | R500_ALU_RGBA_SEL_C_SRC2
511 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1]))
512 | R500_ALU_RGBA_MOD_C_NEG
513 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
514 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1]))
515 | R500_ALU_RGBA_ALPHA_MOD_C_NEG;
516 break;
517 case OPCODE_TEX:
518 src[0] = make_src(fp, fpi->SrcReg[0]);
519 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask
520 | R500_INST_TEX_SEM_WAIT;
521 fp->inst[counter].inst1 = fpi->TexSrcUnit
522 | R500_TEX_INST_LD | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
523 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0])
524 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
525 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
526 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
527 | R500_TEX_DST_ADDR(dest)
528 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
529 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
530 fp->inst[counter].inst3 = 0x0;
531 fp->inst[counter].inst4 = 0x0;
532 fp->inst[counter].inst5 = 0x0;
533 break;
534 case OPCODE_TXP:
535 src[0] = make_src(fp, fpi->SrcReg[0]);
536 fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask;
537 fp->inst[counter].inst1 = fpi->TexSrcUnit
538 | R500_TEX_INST_PROJ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
539 fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(src[0])
540 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
541 | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G
542 | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A
543 | R500_TEX_DST_ADDR(dest)
544 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
545 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
546 fp->inst[counter].inst3 = 0x0;
547 fp->inst[counter].inst4 = 0x0;
548 fp->inst[counter].inst5 = 0x0;
549 break;
550 default:
551 ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
552 break;
553 }
554
555 /* Finishing touches */
556 if (fpi->SaturateMode == SATURATE_ZERO_ONE) {
557 fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP;
558 }
559 if (fpi->DstReg.File == PROGRAM_OUTPUT) {
560 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
561 | R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G
562 | R500_INST_RGB_OMASK_B | R500_INST_ALPHA_OMASK;
563 }
564
565 counter++;
566
567 if (fp->error)
568 return GL_FALSE;
569
570 }
571
572 fp->cs->nrslots = counter;
573
574 /* Finish him! (If it's an output instruction...)
575 * Yes, I know it's ugly... */
576 if ((fp->inst[counter].inst0 & 0x3) ^ 0x2) {
577 fp->inst[counter].inst0 |= R500_INST_TYPE_OUT
578 | R500_INST_TEX_SEM_WAIT | R500_INST_LAST;
579 }
580
581 return GL_TRUE;
582 }
583
584 static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp)
585 {
586 struct r300_pfs_compile_state *cs = NULL;
587 struct gl_fragment_program *mp = &fp->mesa_program;
588 struct prog_instruction *fpi;
589 GLuint InputsRead = mp->Base.InputsRead;
590 GLuint temps_used = 0; /* for fp->temps[] */
591 int i, j;
592
593 /* New compile, reset tracking data */
594 fp->optimization =
595 driQueryOptioni(&r300->radeon.optionCache, "fp_optimization");
596 fp->translated = GL_FALSE;
597 fp->error = GL_FALSE;
598 fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile);
599 fp->cur_node = 0;
600 fp->first_node_has_tex = 0;
601 fp->const_nr = 0;
602 fp->max_temp_idx = 64;
603 fp->node[0].alu_end = -1;
604 fp->node[0].tex_end = -1;
605
606 _mesa_memset(cs, 0, sizeof(*fp->cs));
607 for (i = 0; i < PFS_MAX_ALU_INST; i++) {
608 for (j = 0; j < 3; j++) {
609 cs->slot[i].vsrc[j] = SRC_CONST;
610 cs->slot[i].ssrc[j] = SRC_CONST;
611 }
612 }
613
614 /* Work out what temps the Mesa inputs correspond to, this must match
615 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
616 * configures itself based on the fragprog's InputsRead
617 *
618 * NOTE: this depends on get_hw_temp() allocating registers in order,
619 * starting from register 0.
620 */
621
622 #if 0
623 /* Texcoords come first */
624 for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) {
625 if (InputsRead & (FRAG_BIT_TEX0 << i)) {
626 cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0;
627 cs->inputs[FRAG_ATTRIB_TEX0 + i].reg =
628 get_hw_temp(fp, 0);
629 }
630 }
631 InputsRead &= ~FRAG_BITS_TEX_ANY;
632
633 /* fragment position treated as a texcoord */
634 if (InputsRead & FRAG_BIT_WPOS) {
635 cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0;
636 cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0);
637 insert_wpos(&mp->Base);
638 }
639 InputsRead &= ~FRAG_BIT_WPOS;
640
641 /* Then primary colour */
642 if (InputsRead & FRAG_BIT_COL0) {
643 cs->inputs[FRAG_ATTRIB_COL0].refcount = 0;
644 cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0);
645 }
646 InputsRead &= ~FRAG_BIT_COL0;
647
648 /* Secondary color */
649 if (InputsRead & FRAG_BIT_COL1) {
650 cs->inputs[FRAG_ATTRIB_COL1].refcount = 0;
651 cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0);
652 }
653 InputsRead &= ~FRAG_BIT_COL1;
654
655 /* Anything else */
656 if (InputsRead) {
657 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
658 /* force read from hwreg 0 for now */
659 for (i = 0; i < 32; i++)
660 if (InputsRead & (1 << i))
661 cs->inputs[i].reg = 0;
662 }
663 #endif
664
665 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
666 * That way, we can free up the reg when it's no longer needed
667 */
668 if (!mp->Base.Instructions) {
669 ERROR("No instructions found in program\n");
670 return;
671 }
672
673 for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) {
674 int idx;
675
676 for (i = 0; i < 3; i++) {
677 idx = fpi->SrcReg[i].Index;
678 switch (fpi->SrcReg[i].File) {
679 case PROGRAM_TEMPORARY:
680 if (!(temps_used & (1 << idx))) {
681 cs->temps[idx].reg = -1;
682 cs->temps[idx].refcount = 1;
683 temps_used |= (1 << idx);
684 } else
685 cs->temps[idx].refcount++;
686 break;
687 case PROGRAM_INPUT:
688 cs->inputs[idx].refcount++;
689 break;
690 default:
691 break;
692 }
693 }
694
695 idx = fpi->DstReg.Index;
696 if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
697 if (!(temps_used & (1 << idx))) {
698 cs->temps[idx].reg = -1;
699 cs->temps[idx].refcount = 1;
700 temps_used |= (1 << idx);
701 } else
702 cs->temps[idx].refcount++;
703 }
704 }
705 cs->temp_in_use = temps_used;
706 }
707
708 static void update_params(struct r500_fragment_program *fp)
709 {
710 struct gl_fragment_program *mp = &fp->mesa_program;
711
712 /* Ask Mesa nicely to fill in ParameterValues for us */
713 if (mp->Base.Parameters)
714 _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters);
715 }
716
717 void r500TranslateFragmentShader(r300ContextPtr r300,
718 struct r500_fragment_program *fp)
719 {
720
721 struct r300_pfs_compile_state *cs = NULL;
722
723 if (!fp->translated) {
724
725 /* I need to see what I'm working with! */
726 fprintf(stderr, "Mesa program:\n");
727 fprintf(stderr, "-------------\n");
728 _mesa_print_program(&fp->mesa_program.Base);
729 fflush(stdout);
730
731 init_program(r300, fp);
732 cs = fp->cs;
733
734 if (parse_program(fp) == GL_FALSE) {
735 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
736 dumb_shader(fp);
737 return;
738 }
739
740 /* Finish off */
741 fp->node[fp->cur_node].alu_end =
742 cs->nrslots - fp->node[fp->cur_node].alu_offset - 1;
743 if (fp->node[fp->cur_node].tex_end < 0)
744 fp->node[fp->cur_node].tex_end = 0;
745 fp->alu_offset = 0;
746 fp->alu_end = cs->nrslots - 1;
747 //assert(fp->node[fp->cur_node].alu_end >= 0);
748 //assert(fp->alu_end >= 0);
749
750 fp->translated = GL_TRUE;
751 r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM);
752 }
753
754 update_params(fp);
755 }