2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
70 #define REG_SRC_REL (1 << 9)
71 #define REG_DEST_REL (1 << 7)
74 #define R500_SWIZZLE_ZERO 4
75 #define R500_SWIZZLE_HALF 5
76 #define R500_SWIZZLE_ONE 6
77 #define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
78 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
79 #define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
80 /* Swizzles for inst2 */
81 #define MAKE_SWIZ_TEX_STRQ(x) (x << 8)
82 #define MAKE_SWIZ_TEX_RGBA(x) (x << 24)
83 /* Swizzles for inst3 */
84 #define MAKE_SWIZ_RGB_A(x) (x << 2)
85 #define MAKE_SWIZ_RGB_B(x) (x << 15)
86 /* Swizzles for inst4 */
87 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
88 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
89 /* Swizzle for inst5 */
90 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
91 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
93 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
96 /* This could be optimized, but it should be plenty fast already. */
98 for (i
= 0; i
< 3; i
++) {
99 temp
= (src
.Swizzle
>> i
*3) & 0x7;
100 /* Fix SWIZZLE_ONE */
101 if (temp
== 5) temp
++;
107 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
108 GLuint swiz
= (src
.Swizzle
>> 12) & 0x7;
109 if (swiz
== 5) swiz
++;
113 static inline GLuint
make_strq_swizzle(struct prog_src_register src
) {
115 GLuint temp
= src
.Swizzle
;
117 for (i
= 0; i
< 4; i
++) {
118 swiz
+= (temp
& 0x3) << i
*2;
124 /* Borrowed verbatim from r300_fragprog since it hasn't changed. */
125 static GLuint
emit_const4fv(struct r500_fragment_program
*fp
,
131 for (index
= 0; index
< fp
->const_nr
; ++index
) {
132 if (fp
->constant
[index
] == cp
)
136 if (index
>= fp
->const_nr
) {
137 /* TODO: This should be r5xx nums, not r300 */
138 if (index
>= PFS_NUM_CONST_REGS
) {
139 ERROR("Out of hw constants!\n");
144 fp
->constant
[index
] = cp
;
147 reg
= index
| REG_CONSTANT
;
151 static GLuint
make_src(struct r500_fragment_program
*fp
, struct prog_src_register src
) {
154 case PROGRAM_TEMPORARY
:
155 reg
= (src
.Index
<< 0x1) | 0x1;
158 /* Ugly hack needed to work around Mesa;
159 * fragments don't get loaded right otherwise! */
162 case PROGRAM_STATE_VAR
:
163 case PROGRAM_NAMED_PARAM
:
164 case PROGRAM_CONSTANT
:
165 reg
= emit_const4fv(fp
, fp
->mesa_program
.Base
.Parameters
->
166 ParameterValues
[src
.Index
]);
169 ERROR("Can't handle src.File %x\n", src
.File
);
176 static GLuint
make_dest(struct r500_fragment_program
*fp
, struct prog_dst_register dest
) {
179 case PROGRAM_TEMPORARY
:
180 reg
= (dest
.Index
<< 0x1) | 0x1;
183 /* Eventually we may need to handle multiple
184 * rendering targets... */
188 ERROR("Can't handle dest.File %x\n", dest
.File
);
195 static void emit_tex(struct r500_fragment_program
*fp
,
196 struct prog_instruction
*fpi
, int opcode
, int dest
, int counter
)
201 mask
= fpi
->DstReg
.WriteMask
<< 11;
202 hwsrc
= make_src(fp
, fpi
->SrcReg
[0]);
204 fp
->inst
[counter
].inst0
= R500_INST_TYPE_TEX
| mask
205 | R500_INST_TEX_SEM_WAIT
;
207 fp
->inst
[counter
].inst1
= fpi
->TexSrcUnit
208 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
211 fp
->inst
[counter
].inst1
|= R500_TEX_INST_LD
;
214 fp
->inst
[counter
].inst1
|= R500_TEX_INST_PROJ
;
217 fp
->inst
[counter
].inst2
= R500_TEX_SRC_ADDR(hwsrc
)
218 /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */
219 | R500_TEX_SRC_S_SWIZ_R
| R500_TEX_SRC_T_SWIZ_G
220 | R500_TEX_SRC_R_SWIZ_B
| R500_TEX_SRC_Q_SWIZ_A
221 | R500_TEX_DST_ADDR(dest
)
222 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
223 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
227 fp
->inst
[counter
].inst3
= 0x0;
228 fp
->inst
[counter
].inst4
= 0x0;
229 fp
->inst
[counter
].inst5
= 0x0;
232 static void dumb_shader(struct r500_fragment_program
*fp
)
234 fp
->inst
[0].inst0
= R500_INST_TYPE_TEX
235 | R500_INST_TEX_SEM_WAIT
236 | R500_INST_RGB_WMASK_R
237 | R500_INST_RGB_WMASK_G
238 | R500_INST_RGB_WMASK_B
239 | R500_INST_ALPHA_WMASK
240 | R500_INST_RGB_CLAMP
241 | R500_INST_ALPHA_CLAMP
;
242 fp
->inst
[0].inst1
= R500_TEX_ID(0)
244 | R500_TEX_SEM_ACQUIRE
245 | R500_TEX_IGNORE_UNCOVERED
;
246 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0)
247 | R500_TEX_SRC_S_SWIZ_R
248 | R500_TEX_SRC_T_SWIZ_G
249 | R500_TEX_DST_ADDR(0)
250 | R500_TEX_DST_R_SWIZ_R
251 | R500_TEX_DST_G_SWIZ_G
252 | R500_TEX_DST_B_SWIZ_B
253 | R500_TEX_DST_A_SWIZ_A
;
254 fp
->inst
[0].inst3
= R500_DX_ADDR(0)
264 fp
->inst
[0].inst4
= 0x0;
265 fp
->inst
[0].inst5
= 0x0;
267 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
268 R500_INST_TEX_SEM_WAIT
|
270 R500_INST_RGB_OMASK_R
|
271 R500_INST_RGB_OMASK_G
|
272 R500_INST_RGB_OMASK_B
|
273 R500_INST_ALPHA_OMASK
;
274 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
276 R500_RGB_ADDR1_CONST
|
278 R500_RGB_ADDR2_CONST
|
279 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
280 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
281 R500_ALPHA_ADDR1(0) |
282 R500_ALPHA_ADDR1_CONST
|
283 R500_ALPHA_ADDR2(0) |
284 R500_ALPHA_ADDR2_CONST
|
285 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
286 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
287 R500_ALU_RGB_R_SWIZ_A_R
|
288 R500_ALU_RGB_G_SWIZ_A_G
|
289 R500_ALU_RGB_B_SWIZ_A_B
|
290 R500_ALU_RGB_SEL_B_SRC0
|
291 R500_ALU_RGB_R_SWIZ_B_1
|
292 R500_ALU_RGB_B_SWIZ_B_1
|
293 R500_ALU_RGB_G_SWIZ_B_1
;
294 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
295 R500_ALPHA_SWIZ_A_A
|
297 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
298 R500_ALU_RGBA_R_SWIZ_0
|
299 R500_ALU_RGBA_G_SWIZ_0
|
300 R500_ALU_RGBA_B_SWIZ_0
|
301 R500_ALU_RGBA_A_SWIZ_0
;
304 fp
->translated
= GL_TRUE
;
307 static void emit_alu(struct r500_fragment_program
*fp
) {
310 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
312 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
313 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
314 struct prog_instruction
*fpi
;
315 GLuint src
[3], dest
, temp
[2];
316 int flags
, mask
, counter
= 0;
318 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
319 ERROR("The program is empty!\n");
323 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
325 if (fpi
->Opcode
!= OPCODE_KIL
) {
326 dest
= make_dest(fp
, fpi
->DstReg
);
327 mask
= fpi
->DstReg
.WriteMask
<< 11;
330 switch (fpi
->Opcode
) {
332 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
333 /* Variation on MOV */
334 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
336 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
337 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
338 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
339 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
340 | R500_ALU_RGB_MOD_A_ABS
| R500_ALU_RGB_SEL_B_SRC0
341 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
342 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
343 | R500_ALPHA_ADDRD(dest
)
344 | R500_ALPHA_SEL_A_SRC0
345 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0])) | R500_ALPHA_MOD_A_ABS
346 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
347 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
348 | R500_ALU_RGBA_ADDRD(dest
);
351 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
352 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
353 /* Variation on MAD: 1*src0+src1 */
354 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
356 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
357 | R500_RGB_ADDR1(src
[1]);
358 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
359 | R500_ALPHA_ADDR1(src
[1]);
360 fp
->inst
[counter
].inst3
= /* 1 */
361 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
362 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
363 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
364 | R500_ALPHA_ADDRD(dest
)
365 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
366 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
367 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
368 | R500_ALU_RGBA_ADDRD(dest
)
369 | R500_ALU_RGBA_SEL_C_SRC1
370 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
371 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
372 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
375 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
376 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
377 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
378 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
380 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
381 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
382 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
383 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
384 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
385 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
386 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
387 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
388 | R500_ALPHA_ADDRD(dest
)
389 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
390 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
391 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP3
392 | R500_ALU_RGBA_ADDRD(dest
)
393 | R500_ALU_RGBA_SEL_C_SRC2
394 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
395 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
396 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
399 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
400 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
401 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
403 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
405 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
406 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
407 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
408 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
409 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
410 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
411 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
412 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_DP
413 | R500_ALPHA_ADDRD(dest
)
414 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
415 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
416 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_DP4
417 | R500_ALU_RGBA_ADDRD(dest
)
418 | R500_ALU_RGBA_SEL_C_SRC2
419 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
420 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
421 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
424 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
425 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
426 src
[2] = make_src(fp
, fpi
->SrcReg
[2]);
427 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
429 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
430 | R500_RGB_ADDR1(src
[1]) | R500_RGB_ADDR2(src
[2]);
431 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
432 | R500_ALPHA_ADDR1(src
[1]) | R500_ALPHA_ADDR2(src
[2]);
433 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
434 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
435 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
436 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
437 | R500_ALPHA_ADDRD(dest
)
438 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
439 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
440 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
441 | R500_ALU_RGBA_ADDRD(dest
)
442 | R500_ALU_RGBA_SEL_C_SRC2
443 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
444 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
445 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
448 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
449 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
450 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
451 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
452 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
453 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
454 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
455 | R500_ALU_RGB_SEL_B_SRC1
456 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
457 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
458 | R500_ALPHA_ADDRD(dest
)
459 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
460 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
461 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
462 | R500_ALU_RGBA_ADDRD(dest
);
465 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
466 src
[1] = make_src(fp
, fpi
->SrcReg
[0]);
467 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
| mask
;
468 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]) | R500_RGB_ADDR1(src
[1]);
469 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]) | R500_ALPHA_ADDR1(src
[1]);
470 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
471 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
472 | R500_ALU_RGB_SEL_B_SRC1
473 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
474 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MIN
475 | R500_ALPHA_ADDRD(dest
)
476 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
477 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
478 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MIN
479 | R500_ALU_RGBA_ADDRD(dest
);
482 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
484 /* changed to use MAD - not sure if we
485 ever have negative things which max will fail on */
486 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
488 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
489 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
490 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
491 | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB
)
492 | R500_ALU_RGB_SEL_B_SRC0
493 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE
);
494 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
495 | R500_ALPHA_ADDRD(dest
)
496 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
497 | R500_ALPHA_SWIZ_A_A
| R500_ALPHA_SWIZ_B_1
;
499 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
500 | R500_ALU_RGBA_ADDRD(dest
)
501 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
502 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
505 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
506 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
507 /* Variation on MAD: src0*src1+0 */
508 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
510 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
511 | R500_RGB_ADDR1(src
[1]);
512 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
513 | R500_ALPHA_ADDR1(src
[1]);
514 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
515 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
516 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
517 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
518 | R500_ALPHA_ADDRD(dest
)
519 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
520 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
521 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
522 | R500_ALU_RGBA_ADDRD(dest
)
523 // | R500_ALU_RGBA_SEL_C_SRC2
524 | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO
)
525 // | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
526 | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO
);
529 src
[0] = make_src(fp
, fpi
->SrcReg
[0]);
530 src
[1] = make_src(fp
, fpi
->SrcReg
[1]);
531 /* Variation on MAD: 1*src0-src1 */
532 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
534 fp
->inst
[counter
].inst1
= R500_RGB_ADDR1(src
[0])
535 | R500_RGB_ADDR2(src
[1]);
536 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR1(src
[0])
537 | R500_ALPHA_ADDR2(src
[1]);
538 fp
->inst
[counter
].inst3
= /* 1 */
539 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
540 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
541 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
542 | R500_ALPHA_ADDRD(dest
)
543 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE
)
544 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
545 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
546 | R500_ALU_RGBA_ADDRD(dest
)
547 | R500_ALU_RGBA_SEL_C_SRC2
548 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
549 | R500_ALU_RGBA_MOD_C_NEG
550 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
551 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]))
552 | R500_ALU_RGBA_ALPHA_MOD_C_NEG
;
555 emit_tex(fp
, fpi
, OPCODE_TEX
, dest
, counter
);
558 emit_tex(fp
, fpi
, OPCODE_TXP
, dest
, counter
);
561 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
565 /* Finishing touches */
566 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
567 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
569 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
570 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
571 | R500_INST_RGB_OMASK_R
| R500_INST_RGB_OMASK_G
572 | R500_INST_RGB_OMASK_B
| R500_INST_ALPHA_OMASK
;
582 fp
->cs
->nrslots
= counter
;
584 /* Finish him! (If it's an output instruction...)
585 * Yes, I know it's ugly... */
586 if ((fp
->inst
[counter
].inst0
& 0x3) ^ 0x2) {
587 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
588 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
;
594 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
596 struct r300_pfs_compile_state
*cs
= NULL
;
597 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
598 struct prog_instruction
*fpi
;
599 GLuint InputsRead
= mp
->Base
.InputsRead
;
600 GLuint temps_used
= 0; /* for fp->temps[] */
603 /* New compile, reset tracking data */
605 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
606 fp
->translated
= GL_FALSE
;
607 fp
->error
= GL_FALSE
;
608 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
610 fp
->first_node_has_tex
= 0;
612 fp
->max_temp_idx
= 64;
613 fp
->node
[0].alu_end
= -1;
614 fp
->node
[0].tex_end
= -1;
616 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
617 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
618 for (j
= 0; j
< 3; j
++) {
619 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
620 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
624 /* Work out what temps the Mesa inputs correspond to, this must match
625 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
626 * configures itself based on the fragprog's InputsRead
628 * NOTE: this depends on get_hw_temp() allocating registers in order,
629 * starting from register 0.
633 /* Texcoords come first */
634 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
635 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
636 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
637 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
641 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
643 /* fragment position treated as a texcoord */
644 if (InputsRead
& FRAG_BIT_WPOS
) {
645 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
646 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(fp
, 0);
647 insert_wpos(&mp
->Base
);
649 InputsRead
&= ~FRAG_BIT_WPOS
;
651 /* Then primary colour */
652 if (InputsRead
& FRAG_BIT_COL0
) {
653 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
654 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(fp
, 0);
656 InputsRead
&= ~FRAG_BIT_COL0
;
658 /* Secondary color */
659 if (InputsRead
& FRAG_BIT_COL1
) {
660 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
661 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(fp
, 0);
663 InputsRead
&= ~FRAG_BIT_COL1
;
667 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
668 /* force read from hwreg 0 for now */
669 for (i
= 0; i
< 32; i
++)
670 if (InputsRead
& (1 << i
))
671 cs
->inputs
[i
].reg
= 0;
675 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
676 * That way, we can free up the reg when it's no longer needed
678 if (!mp
->Base
.Instructions
) {
679 ERROR("No instructions found in program\n");
683 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
686 for (i
= 0; i
< 3; i
++) {
687 idx
= fpi
->SrcReg
[i
].Index
;
688 switch (fpi
->SrcReg
[i
].File
) {
689 case PROGRAM_TEMPORARY
:
690 if (!(temps_used
& (1 << idx
))) {
691 cs
->temps
[idx
].reg
= -1;
692 cs
->temps
[idx
].refcount
= 1;
693 temps_used
|= (1 << idx
);
695 cs
->temps
[idx
].refcount
++;
698 cs
->inputs
[idx
].refcount
++;
705 idx
= fpi
->DstReg
.Index
;
706 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
707 if (!(temps_used
& (1 << idx
))) {
708 cs
->temps
[idx
].reg
= -1;
709 cs
->temps
[idx
].refcount
= 1;
710 temps_used
|= (1 << idx
);
712 cs
->temps
[idx
].refcount
++;
715 cs
->temp_in_use
= temps_used
;
718 static void update_params(struct r500_fragment_program
*fp
)
720 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
722 /* Ask Mesa nicely to fill in ParameterValues for us */
723 if (mp
->Base
.Parameters
)
724 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
727 void r500TranslateFragmentShader(r300ContextPtr r300
,
728 struct r500_fragment_program
*fp
)
731 struct r300_pfs_compile_state
*cs
= NULL
;
733 if (!fp
->translated
) {
735 /* I need to see what I'm working with! */
736 fprintf(stderr
, "Mesa program:\n");
737 fprintf(stderr
, "-------------\n");
738 _mesa_print_program(&fp
->mesa_program
.Base
);
741 init_program(r300
, fp
);
744 if (parse_program(fp
) == GL_FALSE
) {
745 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
751 fp
->node
[fp
->cur_node
].alu_end
=
752 cs
->nrslots
- fp
->node
[fp
->cur_node
].alu_offset
- 1;
753 if (fp
->node
[fp
->cur_node
].tex_end
< 0)
754 fp
->node
[fp
->cur_node
].tex_end
= 0;
756 fp
->alu_end
= cs
->nrslots
- 1;
757 //assert(fp->node[fp->cur_node].alu_end >= 0);
758 //assert(fp->alu_end >= 0);
760 fp
->translated
= GL_TRUE
;
761 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);