2 * Copyright (C) 2005 Ben Skeggs.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * \author Ben Skeggs <darktama@iinet.net.au>
33 * \author Jerome Glisse <j.glisse@gmail.com>
35 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
37 * \todo Depth write, WPOS/FOGC inputs
41 * \todo Verify results of opcodes for accuracy, I've only checked them in
48 #include "shader/prog_instruction.h"
49 #include "shader/prog_parameter.h"
50 #include "shader/prog_print.h"
52 #include "r300_context.h"
53 #include "r500_fragprog.h"
55 #include "r300_state.h"
58 * Useful macros and values
60 #define ERROR(fmt, args...) do { \
61 fprintf(stderr, "%s::%s(): " fmt "\n", \
62 __FILE__, __FUNCTION__, ##args); \
63 fp->error = GL_TRUE; \
66 #define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs
68 /* "Register" flags */
69 #define REG_CONSTANT (1 << 8)
72 #define R500_SWIZZLE_ZERO 4
73 #define R500_SWIZZLE_HALF 5
74 #define R500_SWIZZLE_ONE 6
75 #define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
76 /* Swizzles for inst3 */
77 #define MAKE_SWIZ_RGB_A(x) (x << 2)
78 #define MAKE_SWIZ_RGB_B(x) (x << 15)
79 /* Swizzles for inst4 */
80 #define MAKE_SWIZ_ALPHA_A(x) (x << 14)
81 #define MAKE_SWIZ_ALPHA_B(x) (x << 21)
82 /* Swizzle for inst5 */
83 #define MAKE_SWIZ_RGBA_C(x) (x << 14)
84 #define MAKE_SWIZ_ALPHA_C(x) (x << 27)
86 static inline GLuint
make_rgb_swizzle(struct prog_src_register src
) {
89 /* This could be optimized, but it should be plenty fast already. */
90 for (int i
= 0; i
< 3; i
++) {
91 temp
= (src
.Swizzle
>> i
*3) & 0x7;
93 if (temp
== 5) temp
++;
99 static inline GLuint
make_alpha_swizzle(struct prog_src_register src
) {
100 GLuint swiz
= (src
.Swizzle
>> 12) & 0x7;
101 if (swiz
== 5) swiz
++;
105 static GLuint
make_src(struct prog_src_register src
) {
106 GLuint reg
= src
.Index
;
109 /* Ugly hack needed to work around Mesa;
110 * fragments don't get loaded right otherwise! */
113 case PROGRAM_CONSTANT
:
117 // ERROR("Can't handle src.File %x\n", src.File);
123 static GLuint
make_dest(struct prog_dst_register dest
) {
124 GLuint reg
= dest
.Index
;
127 /* Eventually we may need to handle multiple
128 * rendering targets... */
130 case PROGRAM_CONSTANT
:
134 // ERROR("Can't handle dest.File %x\n", dest.File);
140 static void dumb_shader(struct r500_fragment_program
*fp
)
142 /* R500_INST_TYPE_TEX */
143 fp
->inst
[0].inst0
= 0x7808;
144 fp
->inst
[0].inst1
= R500_TEX_ID(0) | R500_TEX_INST_LD
| R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
145 fp
->inst
[0].inst2
= R500_TEX_SRC_ADDR(0) | R500_TEX_SRC_S_SWIZ_R
|
146 R500_TEX_SRC_T_SWIZ_G
|
147 R500_TEX_DST_ADDR(0) |
148 R500_TEX_DST_R_SWIZ_R
|
149 R500_TEX_DST_G_SWIZ_G
|
150 R500_TEX_DST_B_SWIZ_B
|
151 R500_TEX_DST_A_SWIZ_A
;
152 fp
->inst
[0].inst3
= R500_DX_ADDR(0) |
162 fp
->inst
[0].inst4
= 0x0;
163 fp
->inst
[0].inst5
= 0x0;
165 fp
->inst
[1].inst0
= R500_INST_TYPE_OUT
|
166 R500_INST_TEX_SEM_WAIT
|
168 R500_INST_RGB_OMASK_R
|
169 R500_INST_RGB_OMASK_G
|
170 R500_INST_RGB_OMASK_B
|
171 R500_INST_ALPHA_OMASK
;
172 fp
->inst
[1].inst1
= R500_RGB_ADDR0(0) |
174 R500_RGB_ADDR1_CONST
|
176 R500_RGB_ADDR2_CONST
|
177 R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
178 fp
->inst
[1].inst2
= R500_ALPHA_ADDR0(0) |
179 R500_ALPHA_ADDR1(0) |
180 R500_ALPHA_ADDR1_CONST
|
181 R500_ALPHA_ADDR2(0) |
182 R500_ALPHA_ADDR2_CONST
|
183 R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
184 fp
->inst
[1].inst3
= R500_ALU_RGB_SEL_A_SRC0
|
185 R500_ALU_RGB_R_SWIZ_A_R
|
186 R500_ALU_RGB_G_SWIZ_A_G
|
187 R500_ALU_RGB_B_SWIZ_A_B
|
188 R500_ALU_RGB_SEL_B_SRC0
|
189 R500_ALU_RGB_R_SWIZ_B_1
|
190 R500_ALU_RGB_B_SWIZ_B_1
|
191 R500_ALU_RGB_G_SWIZ_B_1
;
192 fp
->inst
[1].inst4
= R500_ALPHA_OP_MAD
|
193 R500_ALPHA_SWIZ_A_A
|
195 fp
->inst
[1].inst5
= R500_ALU_RGBA_OP_MAD
|
196 R500_ALU_RGBA_R_SWIZ_0
|
197 R500_ALU_RGBA_G_SWIZ_0
|
198 R500_ALU_RGBA_B_SWIZ_0
|
199 R500_ALU_RGBA_A_SWIZ_0
;
202 fp
->translated
= GL_TRUE
;
205 static GLboolean
parse_program(struct r500_fragment_program
*fp
)
207 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
208 const struct prog_instruction
*inst
= mp
->Base
.Instructions
;
209 struct prog_instruction
*fpi
;
210 GLuint src
[3], dest
, temp
[2];
211 int flags
, mask
, counter
= 0;
213 if (!inst
|| inst
[0].Opcode
== OPCODE_END
) {
214 ERROR("The program is empty!\n");
218 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
220 if (fpi
->Opcode
!= OPCODE_KIL
) {
221 dest
= make_dest(fpi
->DstReg
);
222 mask
= fpi
->DstReg
.WriteMask
;
225 switch (fpi
->Opcode
) {
227 src
[0] = make_src(fpi
->SrcReg
[0]);
228 src
[1] = make_src(fpi
->SrcReg
[1]);
229 /* Variation on MAD: 1*src0+src1 */
230 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
231 | R500_INST_RGB_WMASK_R
| R500_INST_RGB_WMASK_G
232 | R500_INST_RGB_WMASK_B
| R500_INST_ALPHA_WMASK
;
233 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
234 | R500_RGB_ADDR1(src
[1]);
235 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
236 | R500_ALPHA_ADDR1(src
[1]);
237 fp
->inst
[counter
].inst3
= /* 1 */
238 MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE
)
239 | R500_ALU_RGB_SEL_B_SRC0
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[0]));
240 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
241 | R500_ALPHA_ADDRD(dest
)
242 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE
)
243 | R500_ALPHA_SEL_B_SRC0
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[0]));
244 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
245 | R500_ALU_RGBA_ADDRD(dest
)
246 | R500_ALU_RGBA_SEL_C_SRC1
247 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[1]))
248 | R500_ALU_RGBA_ALPHA_SEL_C_SRC1
249 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[1]));
252 src
[0] = make_src(fpi
->SrcReg
[0]);
253 src
[1] = make_src(fpi
->SrcReg
[1]);
254 src
[2] = make_src(fpi
->SrcReg
[2]);
255 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
256 | R500_INST_RGB_WMASK_R
| R500_INST_RGB_WMASK_G
257 | R500_INST_RGB_WMASK_B
| R500_INST_ALPHA_WMASK
;
258 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0])
259 | R500_RGB_ADDR1(src
[1]);
260 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0])
261 | R500_ALPHA_ADDR1(src
[1]);
262 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
263 | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi
->SrcReg
[0]))
264 | R500_ALU_RGB_SEL_B_SRC1
| MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi
->SrcReg
[1]));
265 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAD
266 | R500_ALPHA_ADDRD(dest
)
267 | R500_ALPHA_SEL_A_SRC0
| MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi
->SrcReg
[0]))
268 | R500_ALPHA_SEL_B_SRC1
| MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi
->SrcReg
[1]));
269 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAD
270 | R500_ALU_RGBA_ADDRD(dest
)
271 | R500_ALU_RGBA_SEL_C_SRC2
272 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi
->SrcReg
[2]))
273 | R500_ALU_RGBA_ALPHA_SEL_C_SRC2
274 | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi
->SrcReg
[2]));
277 src
[0] = make_src(fpi
->SrcReg
[0]);
278 /* We use MAX, but MIN, CND, and CMP also work.
279 * Just remember to disable the OMOD! */
280 fp
->inst
[counter
].inst0
= R500_INST_TYPE_ALU
281 | R500_INST_RGB_WMASK_R
| R500_INST_RGB_WMASK_G
282 | R500_INST_RGB_WMASK_B
| R500_INST_ALPHA_WMASK
;
283 fp
->inst
[counter
].inst1
= R500_RGB_ADDR0(src
[0]);
284 fp
->inst
[counter
].inst2
= R500_ALPHA_ADDR0(src
[0]);
285 fp
->inst
[counter
].inst3
= R500_ALU_RGB_SEL_A_SRC0
286 | R500_ALU_RGB_R_SWIZ_A_R
| R500_ALU_RGB_G_SWIZ_A_G
| R500_ALU_RGB_B_SWIZ_A_B
287 | R500_ALU_RGB_SEL_B_SRC0
288 | R500_ALU_RGB_R_SWIZ_B_R
| R500_ALU_RGB_G_SWIZ_B_G
| R500_ALU_RGB_B_SWIZ_B_B
289 | R500_ALU_RGB_OMOD_DISABLE
;
290 fp
->inst
[counter
].inst4
= R500_ALPHA_OP_MAX
291 | R500_ALPHA_ADDRD(dest
)
292 | R500_ALPHA_SEL_A_SRC0
| R500_ALPHA_SEL_B_SRC0
293 | R500_ALPHA_OMOD_DISABLE
;
294 fp
->inst
[counter
].inst5
= R500_ALU_RGBA_OP_MAX
295 | R500_ALU_RGBA_ADDRD(dest
);
298 ERROR("unknown fpi->Opcode %d\n", fpi
->Opcode
);
302 /* Finishing touches */
303 if (fpi
->SaturateMode
== SATURATE_ZERO_ONE
) {
304 fp
->inst
[counter
].inst0
|= R500_INST_RGB_CLAMP
| R500_INST_ALPHA_CLAMP
;
306 if (fpi
->DstReg
.File
== PROGRAM_OUTPUT
) {
307 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
308 | R500_INST_RGB_OMASK_R
| R500_INST_RGB_OMASK_G
309 | R500_INST_RGB_OMASK_B
| R500_INST_ALPHA_OMASK
;
319 fp
->cs
->nrslots
= counter
;
321 /* Finish him! (If it's an output instruction...)
322 * Yes, I know it's ugly... */
323 if ((fp
->inst
[counter
].inst0
& 0x3) ^ 0x2) {
324 fp
->inst
[counter
].inst0
|= R500_INST_TYPE_OUT
325 | R500_INST_TEX_SEM_WAIT
| R500_INST_LAST
;
331 static void init_program(r300ContextPtr r300
, struct r500_fragment_program
*fp
)
333 struct r300_pfs_compile_state
*cs
= NULL
;
334 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
335 struct prog_instruction
*fpi
;
336 GLuint InputsRead
= mp
->Base
.InputsRead
;
337 GLuint temps_used
= 0; /* for fp->temps[] */
340 /* New compile, reset tracking data */
342 driQueryOptioni(&r300
->radeon
.optionCache
, "fp_optimization");
343 fp
->translated
= GL_FALSE
;
344 fp
->error
= GL_FALSE
;
345 fp
->cs
= cs
= &(R300_CONTEXT(fp
->ctx
)->state
.pfs_compile
);
347 fp
->first_node_has_tex
= 0;
349 fp
->max_temp_idx
= 0;
350 fp
->node
[0].alu_end
= -1;
351 fp
->node
[0].tex_end
= -1;
353 _mesa_memset(cs
, 0, sizeof(*fp
->cs
));
354 for (i
= 0; i
< PFS_MAX_ALU_INST
; i
++) {
355 for (j
= 0; j
< 3; j
++) {
356 cs
->slot
[i
].vsrc
[j
] = SRC_CONST
;
357 cs
->slot
[i
].ssrc
[j
] = SRC_CONST
;
361 /* Work out what temps the Mesa inputs correspond to, this must match
362 * what setup_rs_unit does, which shouldn't be a problem as rs_unit
363 * configures itself based on the fragprog's InputsRead
365 * NOTE: this depends on get_hw_temp() allocating registers in order,
366 * starting from register 0.
370 /* Texcoords come first */
371 for (i
= 0; i
< fp
->ctx
->Const
.MaxTextureUnits
; i
++) {
372 if (InputsRead
& (FRAG_BIT_TEX0
<< i
)) {
373 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].refcount
= 0;
374 cs
->inputs
[FRAG_ATTRIB_TEX0
+ i
].reg
=
378 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
380 /* fragment position treated as a texcoord */
381 if (InputsRead
& FRAG_BIT_WPOS
) {
382 cs
->inputs
[FRAG_ATTRIB_WPOS
].refcount
= 0;
383 cs
->inputs
[FRAG_ATTRIB_WPOS
].reg
= get_hw_temp(fp
, 0);
384 insert_wpos(&mp
->Base
);
386 InputsRead
&= ~FRAG_BIT_WPOS
;
388 /* Then primary colour */
389 if (InputsRead
& FRAG_BIT_COL0
) {
390 cs
->inputs
[FRAG_ATTRIB_COL0
].refcount
= 0;
391 cs
->inputs
[FRAG_ATTRIB_COL0
].reg
= get_hw_temp(fp
, 0);
393 InputsRead
&= ~FRAG_BIT_COL0
;
395 /* Secondary color */
396 if (InputsRead
& FRAG_BIT_COL1
) {
397 cs
->inputs
[FRAG_ATTRIB_COL1
].refcount
= 0;
398 cs
->inputs
[FRAG_ATTRIB_COL1
].reg
= get_hw_temp(fp
, 0);
400 InputsRead
&= ~FRAG_BIT_COL1
;
404 WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead
);
405 /* force read from hwreg 0 for now */
406 for (i
= 0; i
< 32; i
++)
407 if (InputsRead
& (1 << i
))
408 cs
->inputs
[i
].reg
= 0;
412 /* Pre-parse the mesa program, grabbing refcounts on input/temp regs.
413 * That way, we can free up the reg when it's no longer needed
415 if (!mp
->Base
.Instructions
) {
416 ERROR("No instructions found in program\n");
420 for (fpi
= mp
->Base
.Instructions
; fpi
->Opcode
!= OPCODE_END
; fpi
++) {
423 for (i
= 0; i
< 3; i
++) {
424 idx
= fpi
->SrcReg
[i
].Index
;
425 switch (fpi
->SrcReg
[i
].File
) {
426 case PROGRAM_TEMPORARY
:
427 if (!(temps_used
& (1 << idx
))) {
428 cs
->temps
[idx
].reg
= -1;
429 cs
->temps
[idx
].refcount
= 1;
430 temps_used
|= (1 << idx
);
432 cs
->temps
[idx
].refcount
++;
435 cs
->inputs
[idx
].refcount
++;
442 idx
= fpi
->DstReg
.Index
;
443 if (fpi
->DstReg
.File
== PROGRAM_TEMPORARY
) {
444 if (!(temps_used
& (1 << idx
))) {
445 cs
->temps
[idx
].reg
= -1;
446 cs
->temps
[idx
].refcount
= 1;
447 temps_used
|= (1 << idx
);
449 cs
->temps
[idx
].refcount
++;
452 cs
->temp_in_use
= temps_used
;
455 static void update_params(struct r500_fragment_program
*fp
)
457 struct gl_fragment_program
*mp
= &fp
->mesa_program
;
459 /* Ask Mesa nicely to fill in ParameterValues for us */
460 if (mp
->Base
.Parameters
)
461 _mesa_load_state_parameters(fp
->ctx
, mp
->Base
.Parameters
);
464 void r500TranslateFragmentShader(r300ContextPtr r300
,
465 struct r500_fragment_program
*fp
)
468 struct r300_pfs_compile_state
*cs
= NULL
;
470 if (!fp
->translated
) {
472 /* I need to see what I'm working with! */
473 fprintf(stderr
, "Mesa program:\n");
474 fprintf(stderr
, "-------------\n");
475 _mesa_print_program(&fp
->mesa_program
.Base
);
478 init_program(r300
, fp
);
481 if (parse_program(fp
) == GL_FALSE
) {
482 ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n");
488 fp
->node
[fp
->cur_node
].alu_end
=
489 cs
->nrslots
- fp
->node
[fp
->cur_node
].alu_offset
- 1;
490 if (fp
->node
[fp
->cur_node
].tex_end
< 0)
491 fp
->node
[fp
->cur_node
].tex_end
= 0;
493 fp
->alu_end
= cs
->nrslots
- 1;
494 //assert(fp->node[fp->cur_node].alu_end >= 0);
495 //assert(fp->alu_end >= 0);
497 fp
->translated
= GL_TRUE
;
498 r300UpdateStateParameters(fp
->ctx
, _NEW_PROGRAM
);