2 * Copyright (C) 2005 Ben Skeggs.
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 * \author Ben Skeggs <darktama@iinet.net.au>
36 * \author Jerome Glisse <j.glisse@gmail.com>
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
42 #include "r500_fragprog.h"
46 #include "radeon_program_pair.h"
49 struct r500_fragment_program_code *code = &c->code->code.r500
51 #define error(fmt, args...) do { \
52 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
53 __FILE__, __FUNCTION__, ##args); \
63 struct r500_loop_info
{
77 struct radeon_compiler
* C
;
78 struct r500_fragment_program_code
* Code
;
80 struct branch_info
* Branches
;
81 unsigned int CurrentBranchDepth
;
82 unsigned int BranchesReserved
;
84 struct r500_loop_info
* Loops
;
85 unsigned int CurrentLoopDepth
;
86 unsigned int LoopsReserved
;
88 unsigned int MaxBranchDepth
;
92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler
*c
, rc_opcode opcode
)
95 case RC_OPCODE_CMP
: return R500_ALU_RGBA_OP_CMP
;
96 case RC_OPCODE_CND
: return R500_ALU_RGBA_OP_CND
;
97 case RC_OPCODE_DDX
: return R500_ALU_RGBA_OP_MDH
;
98 case RC_OPCODE_DDY
: return R500_ALU_RGBA_OP_MDV
;
99 case RC_OPCODE_DP3
: return R500_ALU_RGBA_OP_DP3
;
100 case RC_OPCODE_DP4
: return R500_ALU_RGBA_OP_DP4
;
101 case RC_OPCODE_FRC
: return R500_ALU_RGBA_OP_FRC
;
103 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode
)->Name
);
107 case RC_OPCODE_MAD
: return R500_ALU_RGBA_OP_MAD
;
108 case RC_OPCODE_MAX
: return R500_ALU_RGBA_OP_MAX
;
109 case RC_OPCODE_MIN
: return R500_ALU_RGBA_OP_MIN
;
110 case RC_OPCODE_REPL_ALPHA
: return R500_ALU_RGBA_OP_SOP
;
114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler
*c
, rc_opcode opcode
)
117 case RC_OPCODE_CMP
: return R500_ALPHA_OP_CMP
;
118 case RC_OPCODE_CND
: return R500_ALPHA_OP_CND
;
119 case RC_OPCODE_COS
: return R500_ALPHA_OP_COS
;
120 case RC_OPCODE_DDX
: return R500_ALPHA_OP_MDH
;
121 case RC_OPCODE_DDY
: return R500_ALPHA_OP_MDV
;
122 case RC_OPCODE_DP3
: return R500_ALPHA_OP_DP
;
123 case RC_OPCODE_DP4
: return R500_ALPHA_OP_DP
;
124 case RC_OPCODE_EX2
: return R500_ALPHA_OP_EX2
;
125 case RC_OPCODE_FRC
: return R500_ALPHA_OP_FRC
;
126 case RC_OPCODE_LG2
: return R500_ALPHA_OP_LN2
;
128 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode
)->Name
);
132 case RC_OPCODE_MAD
: return R500_ALPHA_OP_MAD
;
133 case RC_OPCODE_MAX
: return R500_ALPHA_OP_MAX
;
134 case RC_OPCODE_MIN
: return R500_ALPHA_OP_MIN
;
135 case RC_OPCODE_RCP
: return R500_ALPHA_OP_RCP
;
136 case RC_OPCODE_RSQ
: return R500_ALPHA_OP_RSQ
;
137 case RC_OPCODE_SIN
: return R500_ALPHA_OP_SIN
;
141 static unsigned int fix_hw_swizzle(unsigned int swz
)
144 case RC_SWIZZLE_ZERO
:
145 case RC_SWIZZLE_UNUSED
:
148 case RC_SWIZZLE_HALF
:
159 static unsigned int translate_arg_rgb(struct rc_pair_instruction
*inst
, int arg
)
161 unsigned int t
= inst
->RGB
.Arg
[arg
].Source
;
163 t
|= inst
->RGB
.Arg
[arg
].Negate
<< 11;
164 t
|= inst
->RGB
.Arg
[arg
].Abs
<< 12;
166 for(comp
= 0; comp
< 3; ++comp
)
167 t
|= fix_hw_swizzle(GET_SWZ(inst
->RGB
.Arg
[arg
].Swizzle
, comp
)) << (3*comp
+ 2);
172 static unsigned int translate_arg_alpha(struct rc_pair_instruction
*inst
, int i
)
174 unsigned int t
= inst
->Alpha
.Arg
[i
].Source
;
175 t
|= fix_hw_swizzle(GET_SWZ(inst
->Alpha
.Arg
[i
].Swizzle
, 0)) << 2;
176 t
|= inst
->Alpha
.Arg
[i
].Negate
<< 5;
177 t
|= inst
->Alpha
.Arg
[i
].Abs
<< 6;
181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler
* c
, rc_compare_func func
)
184 case RC_COMPARE_FUNC_EQUAL
: return R500_INST_ALU_RESULT_OP_EQ
;
185 case RC_COMPARE_FUNC_LESS
: return R500_INST_ALU_RESULT_OP_LT
;
186 case RC_COMPARE_FUNC_GEQUAL
: return R500_INST_ALU_RESULT_OP_GE
;
187 case RC_COMPARE_FUNC_NOTEQUAL
: return R500_INST_ALU_RESULT_OP_NE
;
189 rc_error(&c
->Base
, "%s: unsupported compare func %i\n", __FUNCTION__
, func
);
194 static void use_temporary(struct r500_fragment_program_code
* code
, unsigned int index
)
196 if (index
> code
->max_temp_idx
)
197 code
->max_temp_idx
= index
;
200 static unsigned int use_source(struct r500_fragment_program_code
* code
, struct rc_pair_instruction_source src
)
203 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
208 if (src
.File
== RC_FILE_CONSTANT
) {
209 return src
.Index
| R500_RGB_ADDR0_CONST
;
210 } else if (src
.File
== RC_FILE_TEMPORARY
|| src
.File
== RC_FILE_INPUT
) {
211 use_temporary(code
, src
.Index
);
213 } else if (src
.File
== RC_FILE_INLINE
) {
214 return src
.Index
| (1 << 7);
221 * NOP the specified instruction if it is not a texture lookup.
223 static void alu_nop(struct r300_fragment_program_compiler
*c
, int ip
)
227 if ((code
->inst
[ip
].inst0
& 0x3) != R500_INST_TYPE_TEX
) {
228 code
->inst
[ip
].inst0
|= R500_INST_NOP
;
233 * Emit a paired ALU instruction.
235 static void emit_paired(struct r300_fragment_program_compiler
*c
, struct rc_pair_instruction
*inst
)
240 if (code
->inst_end
>= c
->Base
.max_alu_insts
-1) {
241 error("emit_alu: Too many instructions");
245 ip
= ++code
->inst_end
;
247 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
248 if (inst
->RGB
.Opcode
== RC_OPCODE_DDX
|| inst
->Alpha
.Opcode
== RC_OPCODE_DDX
||
249 inst
->RGB
.Opcode
== RC_OPCODE_DDY
|| inst
->Alpha
.Opcode
== RC_OPCODE_DDY
) {
255 code
->inst
[ip
].inst5
= translate_rgb_op(c
, inst
->RGB
.Opcode
);
256 code
->inst
[ip
].inst4
= translate_alpha_op(c
, inst
->Alpha
.Opcode
);
258 if (inst
->RGB
.OutputWriteMask
|| inst
->Alpha
.OutputWriteMask
|| inst
->Alpha
.DepthWriteMask
) {
259 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
;
260 if (inst
->WriteALUResult
) {
261 error("Cannot write output and ALU result at the same time");
265 code
->inst
[ip
].inst0
= R500_INST_TYPE_ALU
;
267 code
->inst
[ip
].inst0
|= (inst
->SemWait
<< R500_INST_TEX_SEM_WAIT_SHIFT
);
269 code
->inst
[ip
].inst0
|= (inst
->RGB
.WriteMask
<< 11);
270 code
->inst
[ip
].inst0
|= inst
->Alpha
.WriteMask
? 1 << 14 : 0;
271 code
->inst
[ip
].inst0
|= (inst
->RGB
.OutputWriteMask
<< 15) | (inst
->Alpha
.OutputWriteMask
<< 18);
273 code
->inst
[ip
].inst0
|= R500_INST_NOP
;
275 if (inst
->Alpha
.DepthWriteMask
) {
276 code
->inst
[ip
].inst4
|= R500_ALPHA_W_OMASK
;
277 c
->code
->writes_depth
= 1;
280 code
->inst
[ip
].inst4
|= R500_ALPHA_ADDRD(inst
->Alpha
.DestIndex
);
281 code
->inst
[ip
].inst5
|= R500_ALU_RGBA_ADDRD(inst
->RGB
.DestIndex
);
282 use_temporary(code
, inst
->Alpha
.DestIndex
);
283 use_temporary(code
, inst
->RGB
.DestIndex
);
285 if (inst
->RGB
.Saturate
)
286 code
->inst
[ip
].inst0
|= R500_INST_RGB_CLAMP
;
287 if (inst
->Alpha
.Saturate
)
288 code
->inst
[ip
].inst0
|= R500_INST_ALPHA_CLAMP
;
290 /* Set the presubtract operation. */
291 switch(inst
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
293 code
->inst
[ip
].inst1
|= R500_RGB_SRCP_OP_1_MINUS_2RGB0
;
296 code
->inst
[ip
].inst1
|= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0
;
299 code
->inst
[ip
].inst1
|= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0
;
302 code
->inst
[ip
].inst1
|= R500_RGB_SRCP_OP_1_MINUS_RGB0
;
307 switch(inst
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Index
) {
309 code
->inst
[ip
].inst2
|= R500_ALPHA_SRCP_OP_1_MINUS_2A0
;
312 code
->inst
[ip
].inst2
|= R500_ALPHA_SRCP_OP_A1_MINUS_A0
;
315 code
->inst
[ip
].inst2
|= R500_ALPHA_SRCP_OP_A1_PLUS_A0
;
318 code
->inst
[ip
].inst2
|= R500_ALPHA_SRCP_OP_1_MINUS_A0
;
324 /* Set the output modifier */
325 code
->inst
[ip
].inst3
|= inst
->RGB
.Omod
<< R500_ALU_RGB_OMOD_SHIFT
;
326 code
->inst
[ip
].inst4
|= inst
->Alpha
.Omod
<< R500_ALPHA_OMOD_SHIFT
;
328 code
->inst
[ip
].inst1
|= R500_RGB_ADDR0(use_source(code
, inst
->RGB
.Src
[0]));
329 code
->inst
[ip
].inst1
|= R500_RGB_ADDR1(use_source(code
, inst
->RGB
.Src
[1]));
330 code
->inst
[ip
].inst1
|= R500_RGB_ADDR2(use_source(code
, inst
->RGB
.Src
[2]));
332 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR0(use_source(code
, inst
->Alpha
.Src
[0]));
333 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR1(use_source(code
, inst
->Alpha
.Src
[1]));
334 code
->inst
[ip
].inst2
|= R500_ALPHA_ADDR2(use_source(code
, inst
->Alpha
.Src
[2]));
336 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 0) << R500_ALU_RGB_SEL_A_SHIFT
;
337 code
->inst
[ip
].inst3
|= translate_arg_rgb(inst
, 1) << R500_ALU_RGB_SEL_B_SHIFT
;
338 code
->inst
[ip
].inst5
|= translate_arg_rgb(inst
, 2) << R500_ALU_RGBA_SEL_C_SHIFT
;
340 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 0) << R500_ALPHA_SEL_A_SHIFT
;
341 code
->inst
[ip
].inst4
|= translate_arg_alpha(inst
, 1) << R500_ALPHA_SEL_B_SHIFT
;
342 code
->inst
[ip
].inst5
|= translate_arg_alpha(inst
, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT
;
344 code
->inst
[ip
].inst3
|= R500_ALU_RGB_TARGET(inst
->RGB
.Target
);
345 code
->inst
[ip
].inst4
|= R500_ALPHA_TARGET(inst
->Alpha
.Target
);
347 if (inst
->WriteALUResult
) {
348 code
->inst
[ip
].inst3
|= R500_ALU_RGB_WMASK
;
350 if (inst
->WriteALUResult
== RC_ALURESULT_X
)
351 code
->inst
[ip
].inst0
|= R500_INST_ALU_RESULT_SEL_RED
;
353 code
->inst
[ip
].inst0
|= R500_INST_ALU_RESULT_SEL_ALPHA
;
355 code
->inst
[ip
].inst0
|= translate_alu_result_op(c
, inst
->ALUResultCompare
);
359 static unsigned int translate_strq_swizzle(unsigned int swizzle
)
361 unsigned int swiz
= 0;
363 for (i
= 0; i
< 4; i
++)
364 swiz
|= (GET_SWZ(swizzle
, i
) & 0x3) << i
*2;
369 * Emit a single TEX instruction
371 static int emit_tex(struct r300_fragment_program_compiler
*c
, struct rc_sub_instruction
*inst
)
376 if (code
->inst_end
>= c
->Base
.max_alu_insts
-1) {
377 error("emit_tex: Too many instructions");
381 ip
= ++code
->inst_end
;
383 code
->inst
[ip
].inst0
= R500_INST_TYPE_TEX
384 | (inst
->DstReg
.WriteMask
<< 11)
385 | (inst
->TexSemWait
<< R500_INST_TEX_SEM_WAIT_SHIFT
);
386 code
->inst
[ip
].inst1
= R500_TEX_ID(inst
->TexSrcUnit
)
387 | (inst
->TexSemAcquire
<< R500_TEX_SEM_ACQUIRE_SHIFT
);
389 if (inst
->TexSrcTarget
== RC_TEXTURE_RECT
)
390 code
->inst
[ip
].inst1
|= R500_TEX_UNSCALED
;
392 switch (inst
->Opcode
) {
394 code
->inst
[ip
].inst1
|= R500_TEX_INST_TEXKILL
;
397 code
->inst
[ip
].inst1
|= R500_TEX_INST_LD
;
400 code
->inst
[ip
].inst1
|= R500_TEX_INST_LODBIAS
;
403 code
->inst
[ip
].inst1
|= R500_TEX_INST_PROJ
;
406 code
->inst
[ip
].inst1
|= R500_TEX_INST_DXDY
;
409 code
->inst
[ip
].inst1
|= R500_TEX_INST_LOD
;
412 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst
->Opcode
)->Name
);
415 use_temporary(code
, inst
->SrcReg
[0].Index
);
416 if (inst
->Opcode
!= RC_OPCODE_KIL
)
417 use_temporary(code
, inst
->DstReg
.Index
);
419 code
->inst
[ip
].inst2
= R500_TEX_SRC_ADDR(inst
->SrcReg
[0].Index
)
420 | (translate_strq_swizzle(inst
->SrcReg
[0].Swizzle
) << 8)
421 | R500_TEX_DST_ADDR(inst
->DstReg
.Index
)
422 | (GET_SWZ(inst
->TexSwizzle
, 0) << 24)
423 | (GET_SWZ(inst
->TexSwizzle
, 1) << 26)
424 | (GET_SWZ(inst
->TexSwizzle
, 2) << 28)
425 | (GET_SWZ(inst
->TexSwizzle
, 3) << 30)
428 if (inst
->Opcode
== RC_OPCODE_TXD
) {
429 use_temporary(code
, inst
->SrcReg
[1].Index
);
430 use_temporary(code
, inst
->SrcReg
[2].Index
);
432 /* DX and DY parameters are specified in a separate register. */
433 code
->inst
[ip
].inst3
=
434 R500_DX_ADDR(inst
->SrcReg
[1].Index
) |
435 (translate_strq_swizzle(inst
->SrcReg
[1].Swizzle
) << 8) |
436 R500_DY_ADDR(inst
->SrcReg
[2].Index
) |
437 (translate_strq_swizzle(inst
->SrcReg
[2].Swizzle
) << 24);
443 static void emit_flowcontrol(struct emit_state
* s
, struct rc_instruction
* inst
)
447 if (s
->Code
->inst_end
>= s
->C
->max_alu_insts
-1) {
448 rc_error(s
->C
, "emit_tex: Too many instructions");
452 newip
= ++s
->Code
->inst_end
;
454 /* Currently all loops use the same integer constant to intialize
455 * the loop variables. */
456 if(!s
->Code
->int_constants
[0]) {
457 s
->Code
->int_constants
[0] = R500_FC_INT_CONST_KR(0xff);
458 s
->Code
->int_constant_count
= 1;
460 s
->Code
->inst
[newip
].inst0
= R500_INST_TYPE_FC
| R500_INST_ALU_WAIT
;
462 switch(inst
->U
.I
.Opcode
){
463 struct branch_info
* branch
;
464 struct r500_loop_info
* loop
;
465 case RC_OPCODE_BGNLOOP
:
466 memory_pool_array_reserve(&s
->C
->Pool
, struct r500_loop_info
,
467 s
->Loops
, s
->CurrentLoopDepth
, s
->LoopsReserved
, 1);
469 loop
= &s
->Loops
[s
->CurrentLoopDepth
++];
470 memset(loop
, 0, sizeof(struct r500_loop_info
));
471 loop
->BranchDepth
= s
->CurrentBranchDepth
;
472 loop
->BgnLoop
= newip
;
474 s
->Code
->inst
[newip
].inst2
= R500_FC_OP_LOOP
475 | R500_FC_JUMP_FUNC(0x00)
476 | R500_FC_IGNORE_UNCOVERED
480 loop
= &s
->Loops
[s
->CurrentLoopDepth
- 1];
481 memory_pool_array_reserve(&s
->C
->Pool
, int, loop
->Brks
,
482 loop
->BrkCount
, loop
->BrkReserved
, 1);
484 loop
->Brks
[loop
->BrkCount
++] = newip
;
485 s
->Code
->inst
[newip
].inst2
= R500_FC_OP_BREAKLOOP
486 | R500_FC_JUMP_FUNC(0xff)
489 s
->CurrentBranchDepth
- loop
->BranchDepth
)
490 | R500_FC_IGNORE_UNCOVERED
495 loop
= &s
->Loops
[s
->CurrentLoopDepth
- 1];
496 memory_pool_array_reserve(&s
->C
->Pool
, int, loop
->Conts
,
497 loop
->ContCount
, loop
->ContReserved
, 1);
498 loop
->Conts
[loop
->ContCount
++] = newip
;
499 s
->Code
->inst
[newip
].inst2
= R500_FC_OP_CONTINUE
500 | R500_FC_JUMP_FUNC(0xff)
503 s
->CurrentBranchDepth
- loop
->BranchDepth
)
504 | R500_FC_IGNORE_UNCOVERED
508 case RC_OPCODE_ENDLOOP
:
510 loop
= &s
->Loops
[s
->CurrentLoopDepth
- 1];
512 s
->Code
->inst
[newip
].inst2
= R500_FC_OP_ENDLOOP
513 | R500_FC_JUMP_FUNC(0xff)
515 | R500_FC_IGNORE_UNCOVERED
517 /* The constant integer at index 0 is used by all loops. */
518 s
->Code
->inst
[newip
].inst3
= R500_FC_INT_ADDR(0)
519 | R500_FC_JUMP_ADDR(loop
->BgnLoop
+ 1)
522 /* Set jump address and int constant for BGNLOOP */
523 s
->Code
->inst
[loop
->BgnLoop
].inst3
= R500_FC_INT_ADDR(0)
524 | R500_FC_JUMP_ADDR(newip
)
527 /* Set jump address for the BRK instructions. */
528 while(loop
->BrkCount
--) {
529 s
->Code
->inst
[loop
->Brks
[loop
->BrkCount
]].inst3
=
530 R500_FC_JUMP_ADDR(newip
+ 1);
533 /* Set jump address for CONT instructions. */
534 while(loop
->ContCount
--) {
535 s
->Code
->inst
[loop
->Conts
[loop
->ContCount
]].inst3
=
536 R500_FC_JUMP_ADDR(newip
);
538 s
->CurrentLoopDepth
--;
542 if ( s
->CurrentBranchDepth
>= R500_PFS_MAX_BRANCH_DEPTH_FULL
) {
543 rc_error(s
->C
, "Branch depth exceeds hardware limit");
546 memory_pool_array_reserve(&s
->C
->Pool
, struct branch_info
,
547 s
->Branches
, s
->CurrentBranchDepth
, s
->BranchesReserved
, 1);
549 branch
= &s
->Branches
[s
->CurrentBranchDepth
++];
554 if (s
->CurrentBranchDepth
> s
->MaxBranchDepth
)
555 s
->MaxBranchDepth
= s
->CurrentBranchDepth
;
557 /* actual instruction is filled in at ENDIF time */
561 if (!s
->CurrentBranchDepth
) {
562 rc_error(s
->C
, "%s: got ELSE outside a branch", __FUNCTION__
);
566 branch
= &s
->Branches
[s
->CurrentBranchDepth
- 1];
567 branch
->Else
= newip
;
569 /* actual instruction is filled in at ENDIF time */
572 case RC_OPCODE_ENDIF
:
573 if (!s
->CurrentBranchDepth
) {
574 rc_error(s
->C
, "%s: got ELSE outside a branch", __FUNCTION__
);
578 branch
= &s
->Branches
[s
->CurrentBranchDepth
- 1];
579 branch
->Endif
= newip
;
581 s
->Code
->inst
[branch
->Endif
].inst2
= R500_FC_OP_JUMP
582 | R500_FC_A_OP_NONE
/* no address stack */
583 | R500_FC_JUMP_ANY
/* docs says set this, but I don't understand why */
584 | R500_FC_B_OP0_DECR
/* decrement branch counter if stay */
585 | R500_FC_B_OP1_NONE
/* no branch counter if stay */
586 | R500_FC_B_POP_CNT(1)
588 s
->Code
->inst
[branch
->Endif
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
589 s
->Code
->inst
[branch
->If
].inst2
= R500_FC_OP_JUMP
590 | R500_FC_A_OP_NONE
/* no address stack */
591 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
592 | R500_FC_B_OP0_INCR
/* increment branch counter if stay */
593 | R500_FC_IGNORE_UNCOVERED
596 if (branch
->Else
>= 0) {
597 /* increment branch counter also if jump */
598 s
->Code
->inst
[branch
->If
].inst2
|= R500_FC_B_OP1_INCR
;
599 s
->Code
->inst
[branch
->If
].inst3
= R500_FC_JUMP_ADDR(branch
->Else
+ 1);
601 s
->Code
->inst
[branch
->Else
].inst2
= R500_FC_OP_JUMP
602 | R500_FC_A_OP_NONE
/* no address stack */
603 | R500_FC_B_ELSE
/* all active pixels want to jump */
604 | R500_FC_B_OP0_NONE
/* no counter op if stay */
605 | R500_FC_B_OP1_DECR
/* decrement branch counter if jump */
606 | R500_FC_B_POP_CNT(1)
608 s
->Code
->inst
[branch
->Else
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
610 /* don't touch branch counter on jump */
611 s
->Code
->inst
[branch
->If
].inst2
|= R500_FC_B_OP1_NONE
;
612 s
->Code
->inst
[branch
->If
].inst3
= R500_FC_JUMP_ADDR(branch
->Endif
+ 1);
616 s
->CurrentBranchDepth
--;
619 rc_error(s
->C
, "%s: unknown opcode %s\n", __FUNCTION__
, rc_get_opcode_info(inst
->U
.I
.Opcode
)->Name
);
623 void r500BuildFragmentProgramHwCode(struct radeon_compiler
*c
, void *user
)
625 struct r300_fragment_program_compiler
*compiler
= (struct r300_fragment_program_compiler
*)c
;
627 struct r500_fragment_program_code
*code
= &compiler
->code
->code
.r500
;
629 memset(&s
, 0, sizeof(s
));
630 s
.C
= &compiler
->Base
;
633 memset(code
, 0, sizeof(*code
));
634 code
->max_temp_idx
= 1;
637 for(struct rc_instruction
* inst
= compiler
->Base
.Program
.Instructions
.Next
;
638 inst
!= &compiler
->Base
.Program
.Instructions
&& !compiler
->Base
.Error
;
640 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
641 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
643 if (opcode
->IsFlowControl
) {
644 emit_flowcontrol(&s
, inst
);
645 } else if (inst
->U
.I
.Opcode
== RC_OPCODE_BEGIN_TEX
) {
648 emit_tex(compiler
, &inst
->U
.I
);
651 emit_paired(compiler
, &inst
->U
.P
);
655 if (code
->max_temp_idx
>= compiler
->Base
.max_temp_regs
)
656 rc_error(&compiler
->Base
, "Too many hardware temporaries used");
658 if (compiler
->Base
.Error
)
661 if (code
->inst_end
== -1 ||
662 (code
->inst
[code
->inst_end
].inst0
& R500_INST_TYPE_MASK
) != R500_INST_TYPE_OUT
) {
665 /* This may happen when dead-code elimination is disabled or
666 * when most of the fragment program logic is leading to a KIL */
667 if (code
->inst_end
>= compiler
->Base
.max_alu_insts
-1) {
668 rc_error(&compiler
->Base
, "Introducing fake OUT: Too many instructions");
672 ip
= ++code
->inst_end
;
673 code
->inst
[ip
].inst0
= R500_INST_TYPE_OUT
| R500_INST_TEX_SEM_WAIT
;
676 /* Make sure TEX_SEM_WAIT is set on the last instruction */
677 code
->inst
[code
->inst_end
].inst0
|= R500_INST_TEX_SEM_WAIT
;
679 /* Enable full flow control mode if we are using loops or have if
680 * statements nested at least four deep. */
681 if (s
.MaxBranchDepth
>= 4 || s
.LoopsReserved
> 0) {
682 if (code
->max_temp_idx
< 1)
683 code
->max_temp_idx
= 1;
685 code
->us_fc_ctrl
|= R500_FC_FULL_FC_EN
;