2 * Copyright (C) 2005 Ben Skeggs.
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
42 #include "r500_fragprog.h"
44 #include "../r300_reg.h"
46 #include "radeon_program_pair.h"
50 struct r500_fragment_program_code *code = &c->code->code.r500
52 #define error(fmt, args...) do { \
53 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
54 __FILE__, __FUNCTION__, ##args); \
65 struct radeon_compiler
* C
;
66 struct r500_fragment_program_code
* Code
;
68 struct branch_info
* Branches
;
69 unsigned int CurrentBranchDepth
;
70 unsigned int BranchesReserved
;
72 unsigned int MaxBranchDepth
;
75 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler
*c
, rc_opcode opcode
)
78 case RC_OPCODE_CMP
: return R500_ALU_RGBA_OP_CMP
;
79 case RC_OPCODE_DDX
: return R500_ALU_RGBA_OP_MDH
;
80 case RC_OPCODE_DDY
: return R500_ALU_RGBA_OP_MDV
;
81 case RC_OPCODE_DP3
: return R500_ALU_RGBA_OP_DP3
;
82 case RC_OPCODE_DP4
: return R500_ALU_RGBA_OP_DP4
;
83 case RC_OPCODE_FRC
: return R500_ALU_RGBA_OP_FRC
;
85 error("translate_rgb_op(%d): unknown opcode\n", opcode
);
89 case RC_OPCODE_MAD
: return R500_ALU_RGBA_OP_MAD
;
90 case RC_OPCODE_MAX
: return R500_ALU_RGBA_OP_MAX
;
91 case RC_OPCODE_MIN
: return R500_ALU_RGBA_OP_MIN
;
92 case RC_OPCODE_REPL_ALPHA
: return R500_ALU_RGBA_OP_SOP
;
96 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler
*c
, rc_opcode opcode
)
99 case RC_OPCODE_CMP
: return R500_ALPHA_OP_CMP
;
100 case RC_OPCODE_COS
: return R500_ALPHA_OP_COS
;
101 case RC_OPCODE_DDX
: return R500_ALPHA_OP_MDH
;
102 case RC_OPCODE_DDY
: return R500_ALPHA_OP_MDV
;
103 case RC_OPCODE_DP3
: return R500_ALPHA_OP_DP
;
104 case RC_OPCODE_DP4
: return R500_ALPHA_OP_DP
;
105 case RC_OPCODE_EX2
: return R500_ALPHA_OP_EX2
;
106 case RC_OPCODE_FRC
: return R500_ALPHA_OP_FRC
;
107 case RC_OPCODE_LG2
: return R500_ALPHA_OP_LN2
;
109 error("translate_alpha_op(%d): unknown opcode\n", opcode
);
113 case RC_OPCODE_MAD
: return R500_ALPHA_OP_MAD
;
114 case RC_OPCODE_MAX
: return R500_ALPHA_OP_MAX
;
115 case RC_OPCODE_MIN
: return R500_ALPHA_OP_MIN
;
116 case RC_OPCODE_RCP
: return R500_ALPHA_OP_RCP
;
117 case RC_OPCODE_RSQ
: return R500_ALPHA_OP_RSQ
;
118 case RC_OPCODE_SIN
: return R500_ALPHA_OP_SIN
;
122 static unsigned int fix_hw_swizzle(unsigned int swz
)
125 case RC_SWIZZLE_ZERO
:
126 case RC_SWIZZLE_UNUSED
:
129 case RC_SWIZZLE_HALF
:
140 static unsigned int translate_arg_rgb(struct rc_pair_instruction
*inst
, int arg
)
142 unsigned int t
= inst
->RGB
.Arg
[arg
].Source
;
144 t
|= inst
->RGB
.Arg
[arg
].Negate
<< 11;
145 t
|= inst
->RGB
.Arg
[arg
].Abs
<< 12;
147 for(comp
= 0; comp
< 3; ++comp
)
148 t
|= fix_hw_swizzle(GET_SWZ(inst
->RGB
.Arg
[arg
].Swizzle
, comp
)) << (3*comp
+ 2);
153 static unsigned int translate_arg_alpha(struct rc_pair_instruction
*inst
, int i
)
155 unsigned int t
= inst
->Alpha
.Arg
[i
].Source
;
156 t
|= fix_hw_swizzle(inst
->Alpha
.Arg
[i
].Swizzle
) << 2;
157 t
|= inst
->Alpha
.Arg
[i
].Negate
<< 5;
158 t
|= inst
->Alpha
.Arg
[i
].Abs
<< 6;
162 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler
* c
, rc_compare_func func
)
165 case RC_COMPARE_FUNC_EQUAL
: return R500_INST_ALU_RESULT_OP_EQ
;
166 case RC_COMPARE_FUNC_LESS
: return R500_INST_ALU_RESULT_OP_LT
;
167 case RC_COMPARE_FUNC_GEQUAL
: return R500_INST_ALU_RESULT_OP_GE
;
168 case RC_COMPARE_FUNC_NOTEQUAL
: return R500_INST_ALU_RESULT_OP_NE
;
170 rc_error(&c
->Base
, "%s: unsupported compare func %i\n", __FUNCTION__
, func
);
175 static void use_temporary(struct r500_fragment_program_code
* code
, unsigned int index
)
177 if (index
> code
->max_temp_idx
)
178 code
->max_temp_idx
= index
;
181 static unsigned int use_source(struct r500_fragment_program_code
* code
, struct radeon_pair_instruction_source src
)
183 if (src
.File
== RC_FILE_CONSTANT
) {
184 return src
.Index
| 0x100;
185 } else if (src
.File
== RC_FILE_TEMPORARY
) {
186 use_temporary(code
, src
.Index
);
195 * Emit a paired ALU instruction.
197 static void emit_paired(struct r300_fragment_program_compiler
*c
, struct rc_pair_instruction
*inst
)
201 if (code
->inst_end
>= 511) {
202 error("emit_alu: Too many instructions");
206 int ip
= ++code
->inst_end
;
208 code
->inst
[ip
].inst5
= translate_rgb_op(c
, inst
->RGB
.Opcode
);
209 code
->inst
[ip
].inst4
= translate_alpha_op(c
, inst
->Alpha
.Opcode
);
211 if (inst
->RGB
.OutputWriteMask
|| inst
->Alpha
.OutputWriteMask
|| inst
->Alpha
.DepthWriteMask
) {
212 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
;
213 if (inst
->WriteALUResult
) {
214 error("%s: cannot write output and ALU result at the same time");
218 code
->inst
[ip
].inst0
= R500_INST_TYPE_ALU
;
220 code
->inst
[ip
].inst0
|= R500_INST_TEX_SEM_WAIT
;
222 code
->inst
[ip
].inst0
|= (inst
->RGB
.WriteMask
<< 11) | (inst
->Alpha
.WriteMask
<< 14);
223 code
->inst
[ip
].inst0
|= (inst
->RGB
.OutputWriteMask
<< 15) | (inst
->Alpha
.OutputWriteMask
<< 18);
224 if (inst
->Alpha
.DepthWriteMask
) {
225 code
->inst
[ip
].inst4
|= R500_ALPHA_W_OMASK
;
226 c
->code
->writes_depth
= 1;
229 code
->inst
[ip
].inst4
|= R500_ALPHA_ADDRD(inst
->Alpha
.DestIndex
);
230 code
->inst
[ip
].inst5
|= R500_ALU_RGBA_ADDRD(inst
->RGB
.DestIndex
);
231 use_temporary(code
, inst
->Alpha
.DestIndex
);
232 use_temporary(code
, inst
->RGB
.DestIndex
);
234 if (inst
->RGB
.Saturate
)
235 code
->inst
[ip
].inst0
|= R500_INST_RGB_CLAMP
;
236 if (inst
->Alpha
.Saturate
)
237 code
->inst
[ip
].inst0
|= R500_INST_ALPHA_CLAMP
;
239 code
->inst
[ip
].inst1
|= R500_RGB_ADDR0(use_source(code
, inst
->RGB
.Src
[0]));
240 code
->inst
[ip
].inst1
|= R500_RGB_ADDR1(use_source(code
, inst
->RGB
.Src
[1]));
241 code
->inst
[ip
].inst1
|= R500_RGB_ADDR2(use_source(code
, inst
->RGB
.Src
[2]));
243 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR0(use_source(code
, inst
->Alpha
.Src
[0]));
244 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR1(use_source(code
, inst
->Alpha
.Src
[1]));
245 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR2(use_source(code
, inst
->Alpha
.Src
[2]));
247 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 0) << R500_ALU_RGB_SEL_A_SHIFT
;
248 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 1) << R500_ALU_RGB_SEL_B_SHIFT
;
249 code
->inst
[ip
].inst5
|= translate_arg_rgb(inst
, 2) << R500_ALU_RGBA_SEL_C_SHIFT
;
251 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 0) << R500_ALPHA_SEL_A_SHIFT
;
252 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 1) << R500_ALPHA_SEL_B_SHIFT
;
253 code
->inst
[ip
].inst5
|= translate_arg_alpha(inst
, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT
;
255 code
->inst
[ip
].inst3
|= R500_ALU_RGB_TARGET(inst
->RGB
.Target
);
256 code
->inst
[ip
].inst4
|= R500_ALPHA_TARGET(inst
->Alpha
.Target
);
258 if (inst
->WriteALUResult
) {
259 code
->inst
[ip
].inst3
|= R500_ALU_RGB_WMASK
;
261 if (inst
->WriteALUResult
== RC_ALURESULT_X
)
262 code
->inst
[ip
].inst0
|= R500_INST_ALU_RESULT_SEL_RED
;
264 code
->inst
[ip
].inst0
|= R500_INST_ALU_RESULT_SEL_ALPHA
;
266 code
->inst
[ip
].inst0
|= translate_alu_result_op(c
, inst
->ALUResultCompare
);
270 static unsigned int translate_strq_swizzle(unsigned int swizzle
)
272 unsigned int swiz
= 0;
274 for (i
= 0; i
< 4; i
++)
275 swiz
|= (GET_SWZ(swizzle
, i
) & 0x3) << i
*2;
280 * Emit a single TEX instruction
282 static int emit_tex(struct r300_fragment_program_compiler
*c
, struct rc_sub_instruction
*inst
)
286 if (code
->inst_end
>= 511) {
287 error("emit_tex: Too many instructions");
291 int ip
= ++code
->inst_end
;
293 code
->inst
[ip
].inst0
= R500_INST_TYPE_TEX
294 | (inst
->DstReg
.WriteMask
<< 11)
295 | R500_INST_TEX_SEM_WAIT
;
296 code
->inst
[ip
].inst1
= R500_TEX_ID(inst
->TexSrcUnit
)
297 | R500_TEX_SEM_ACQUIRE
| R500_TEX_IGNORE_UNCOVERED
;
299 if (inst
->TexSrcTarget
== RC_TEXTURE_RECT
)
300 code
->inst
[ip
].inst1
|= R500_TEX_UNSCALED
;
302 switch (inst
->Opcode
) {
304 code
->inst
[ip
].inst1
|= R500_TEX_INST_TEXKILL
;
307 code
->inst
[ip
].inst1
|= R500_TEX_INST_LD
;
310 code
->inst
[ip
].inst1
|= R500_TEX_INST_LODBIAS
;
313 code
->inst
[ip
].inst1
|= R500_TEX_INST_PROJ
;
316 error("emit_tex can't handle opcode %x\n", inst
->Opcode
);
319 use_temporary(code
, inst
->SrcReg
[0].Index
);
320 if (inst
->Opcode
!= RC_OPCODE_KIL
)
321 use_temporary(code
, inst
->DstReg
.Index
);
323 code
->inst
[ip
].inst2
= R500_TEX_SRC_ADDR(inst
->SrcReg
[0].Index
)
324 | (translate_strq_swizzle(inst
->SrcReg
[0].Swizzle
) << 8)
325 | R500_TEX_DST_ADDR(inst
->DstReg
.Index
)
326 | R500_TEX_DST_R_SWIZ_R
| R500_TEX_DST_G_SWIZ_G
327 | R500_TEX_DST_B_SWIZ_B
| R500_TEX_DST_A_SWIZ_A
;
332 static void grow_branches(struct emit_state
* s
)
334 unsigned int newreserved
= s
->BranchesReserved
* 2;
335 struct branch_info
* newbranches
;
340 newbranches
= memory_pool_malloc(&s
->C
->Pool
, newreserved
*sizeof(struct branch_info
));
341 memcpy(newbranches
, s
->Branches
, s
->CurrentBranchDepth
*sizeof(struct branch_info
));
343 s
->Branches
= newbranches
;
344 s
->BranchesReserved
= newreserved
;
347 static void emit_flowcontrol(struct emit_state
* s
, struct rc_instruction
* inst
)
349 if (s
->Code
->inst_end
>= 511) {
350 rc_error(s
->C
, "emit_tex: Too many instructions");
354 unsigned int newip
= ++s
->Code
->inst_end
;
356 s
->Code
->inst
[newip
].inst0
= R500_INST_TYPE_FC
| R500_INST_ALU_WAIT
;
358 if (inst
->U
.I
.Opcode
== RC_OPCODE_IF
) {
359 if (s
->CurrentBranchDepth
>= 32) {
360 rc_error(s
->C
, "Branch depth exceeds hardware limit");
364 if (s
->CurrentBranchDepth
>= s
->BranchesReserved
)
367 struct branch_info
* branch
= &s
->Branches
[s
->CurrentBranchDepth
++];
372 if (s
->CurrentBranchDepth
> s
->MaxBranchDepth
)
373 s
->MaxBranchDepth
= s
->CurrentBranchDepth
;
375 /* actual instruction is filled in at ENDIF time */
376 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ELSE
) {
377 if (!s
->CurrentBranchDepth
) {
378 rc_error(s
->C
, "%s: got ELSE outside a branch", __FUNCTION__
);
382 struct branch_info
* branch
= &s
->Branches
[s
->CurrentBranchDepth
- 1];
383 branch
->Else
= newip
;
385 /* actual instruction is filled in at ENDIF time */
386 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_ENDIF
) {
387 if (!s
->CurrentBranchDepth
) {
388 rc_error(s
->C
, "%s: got ELSE outside a branch", __FUNCTION__
);
392 struct branch_info
* branch
= &s
->Branches
[s
->CurrentBranchDepth
- 1];
393 branch
->Endif
= newip
;
395 s
->Code
->inst
[branch
->If
].inst2
= R500_FC_OP_JUMP
396 | R500_FC_A_OP_NONE
/* no address stack */
397 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
398 | R500_FC_B_OP0_INCR
/* increment branch counter if stay */
401 if (branch
->Else
>= 0) {
402 /* increment branch counter also if jump */
403 s
->Code
->inst
[branch
->If
].inst2
|= R500_FC_B_OP1_INCR
;
404 s
->Code
->inst
[branch
->If
].inst3
= R500_FC_JUMP_ADDR(branch
->Else
+ 1);
406 s
->Code
->inst
[branch
->Else
].inst2
= R500_FC_OP_JUMP
407 | R500_FC_A_OP_NONE
/* no address stack */
408 | R500_FC_B_ELSE
/* all active pixels want to jump */
409 | R500_FC_B_OP0_NONE
/* no counter op if stay */
410 | R500_FC_B_OP1_DECR
/* decrement branch counter if jump */
411 | R500_FC_B_POP_CNT(1)
413 s
->Code
->inst
[branch
->Else
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
415 /* don't touch branch counter on jump */
416 s
->Code
->inst
[branch
->If
].inst2
|= R500_FC_B_OP1_NONE
;
417 s
->Code
->inst
[branch
->If
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
420 s
->Code
->inst
[branch
->Endif
].inst2
= R500_FC_OP_JUMP
421 | R500_FC_A_OP_NONE
/* no address stack */
422 | R500_FC_JUMP_ANY
/* docs says set this, but I don't understand why */
423 | R500_FC_B_OP0_DECR
/* decrement branch counter if stay */
424 | R500_FC_B_OP1_NONE
/* no branch counter if stay */
425 | R500_FC_B_POP_CNT(1)
427 s
->Code
->inst
[branch
->Endif
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
429 s
->CurrentBranchDepth
--;
431 rc_error(s
->C
, "%s: unknown opcode %i\n", __FUNCTION__
, inst
->U
.I
.Opcode
);
435 void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
*compiler
)
438 struct r500_fragment_program_code
*code
= &compiler
->code
->code
.r500
;
440 memset(&s
, 0, sizeof(s
));
441 s
.C
= &compiler
->Base
;
444 memset(code
, 0, sizeof(*code
));
445 code
->max_temp_idx
= 1;
448 for(struct rc_instruction
* inst
= compiler
->Base
.Program
.Instructions
.Next
;
449 inst
!= &compiler
->Base
.Program
.Instructions
&& !compiler
->Base
.Error
;
451 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
452 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
454 if (opcode
->IsFlowControl
) {
455 emit_flowcontrol(&s
, inst
);
456 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_BEGIN_TEX
) {
459 emit_tex(compiler
, &inst
->U
.I
);
462 emit_paired(compiler
, &inst
->U
.P
);
466 if (code
->max_temp_idx
>= 128)
467 rc_error(&compiler
->Base
, "Too many hardware temporaries used");
469 if (compiler
->Base
.Error
)
472 if (code
->inst_end
== -1 ||
473 (code
->inst
[code
->inst_end
].inst0
& R500_INST_TYPE_MASK
) != R500_INST_TYPE_OUT
) {
474 /* This may happen when dead-code elimination is disabled or
475 * when most of the fragment program logic is leading to a KIL */
476 if (code
->inst_end
>= 511) {
477 rc_error(&compiler
->Base
, "Introducing fake OUT: Too many instructions");
481 int ip
= ++code
->inst_end
;
482 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
| R500_INST_TEX_SEM_WAIT
;
485 if (s
.MaxBranchDepth
>= 4) {
486 if (code
->max_temp_idx
< 1)
487 code
->max_temp_idx
= 1;
489 code
->us_fc_ctrl
|= R500_FC_FULL_FC_EN
;