2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Perform temporary register allocation and attempt to pair off instructions
32 * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
33 * vs. ALU instruction scheduling.
36 #include "radeon_program_pair.h"
38 #include "radeon_context.h"
40 #include "shader/prog_print.h"
42 #define error(fmt, args...) do { \
43 _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
44 __FILE__, __FUNCTION__, ##args); \
48 struct pair_state_instruction
{
49 GLuint IsTex
:1; /**< Is a texture instruction */
50 GLuint IsOutput
:1; /**< Is output instruction */
51 GLuint NeedRGB
:1; /**< Needs the RGB ALU */
52 GLuint NeedAlpha
:1; /**< Needs the Alpha ALU */
53 GLuint IsTranscendent
:1; /**< Is a special transcendent instruction */
56 * Number of (read and write) dependencies that must be resolved before
57 * this instruction can be scheduled.
59 GLuint NumDependencies
:5;
62 * Next instruction in the linked list of ready instructions.
64 struct pair_state_instruction
*NextReady
;
67 * Values that this instruction writes
69 struct reg_value
*Values
[4];
74 * Used to keep track of which instructions read a value.
76 struct reg_value_reader
{
77 GLuint IP
; /**< IP of the instruction that performs this access */
78 struct reg_value_reader
*Next
;
82 * Used to keep track which values are stored in each component of a
86 GLuint IP
; /**< IP of the instruction that writes this value */
87 struct reg_value
*Next
; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
90 * Unordered linked list of instructions that read from this value.
92 struct reg_value_reader
*Readers
;
95 * Number of readers of this value. This is calculated during @ref scan_instructions
96 * and continually decremented during code emission.
97 * When this count reaches zero, the instruction that writes the @ref Next value
104 * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
105 * to the proper hardware temporary.
107 struct pair_register_translation
{
110 GLuint RefCount
:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
113 * Notes the value that is currently contained in each component
114 * (only used for PROGRAM_TEMPORARY registers).
116 struct reg_value
*Value
[4];
121 struct gl_program
*Program
;
122 const struct radeon_pair_handler
*Handler
;
127 GLubyte NumKillInsts
;
130 * Translate Mesa registers to hardware registers
132 struct pair_register_translation Inputs
[FRAG_ATTRIB_MAX
];
133 struct pair_register_translation Temps
[MAX_PROGRAM_TEMPS
];
136 * Derived information about program instructions.
138 struct pair_state_instruction
*Instructions
;
141 GLuint RefCount
; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
145 * Linked list of instructions that can be scheduled right now,
146 * based on which ALU/TEX resources they require.
148 struct pair_state_instruction
*ReadyFullALU
;
149 struct pair_state_instruction
*ReadyRGB
;
150 struct pair_state_instruction
*ReadyAlpha
;
151 struct pair_state_instruction
*ReadyTEX
;
154 * Linked list of deferred instructions
156 struct pair_state_instruction
*DeferredInsts
;
159 * Pool of @ref reg_value structures for fast allocation.
161 struct reg_value
*ValuePool
;
162 GLuint ValuePoolUsed
;
163 struct reg_value_reader
*ReaderPool
;
164 GLuint ReaderPoolUsed
;
168 static struct pair_register_translation
*get_register(struct pair_state
*s
, GLuint file
, GLuint index
)
171 case PROGRAM_TEMPORARY
: return &s
->Temps
[index
];
172 case PROGRAM_INPUT
: return &s
->Inputs
[index
];
177 static void alloc_hw_reg(struct pair_state
*s
, GLuint file
, GLuint index
, GLuint hwindex
)
179 struct pair_register_translation
*t
= get_register(s
, file
, index
);
180 ASSERT(!s
->HwTemps
[hwindex
].RefCount
);
181 ASSERT(!t
->Allocated
);
182 s
->HwTemps
[hwindex
].RefCount
= t
->RefCount
;
184 t
->HwIndex
= hwindex
;
187 static GLuint
get_hw_reg(struct pair_state
*s
, GLuint file
, GLuint index
)
191 struct pair_register_translation
*t
= get_register(s
, file
, index
);
193 _mesa_problem(s
->Ctx
, "get_hw_reg: %i[%i]\n", file
, index
);
200 for(hwindex
= 0; hwindex
< s
->Handler
->MaxHwTemps
; ++hwindex
)
201 if (!s
->HwTemps
[hwindex
].RefCount
)
204 if (hwindex
>= s
->Handler
->MaxHwTemps
) {
205 error("Ran out of hardware temporaries");
209 alloc_hw_reg(s
, file
, index
, hwindex
);
214 static void deref_hw_reg(struct pair_state
*s
, GLuint hwindex
)
216 if (!s
->HwTemps
[hwindex
].RefCount
) {
217 error("Hwindex %i refcount error", hwindex
);
221 s
->HwTemps
[hwindex
].RefCount
--;
224 static void add_pairinst_to_list(struct pair_state_instruction
**list
, struct pair_state_instruction
*pairinst
)
226 pairinst
->NextReady
= *list
;
231 * The instruction at the given IP has become ready. Link it into the ready
234 static void instruction_ready(struct pair_state
*s
, int ip
)
236 struct pair_state_instruction
*pairinst
= s
->Instructions
+ ip
;
239 _mesa_printf("instruction_ready(%i)\n", ip
);
241 if (s
->NumKillInsts
> 0 && pairinst
->IsOutput
)
242 add_pairinst_to_list(&s
->DeferredInsts
, pairinst
);
243 else if (pairinst
->IsTex
)
244 add_pairinst_to_list(&s
->ReadyTEX
, pairinst
);
245 else if (!pairinst
->NeedAlpha
)
246 add_pairinst_to_list(&s
->ReadyRGB
, pairinst
);
247 else if (!pairinst
->NeedRGB
)
248 add_pairinst_to_list(&s
->ReadyAlpha
, pairinst
);
250 add_pairinst_to_list(&s
->ReadyFullALU
, pairinst
);
255 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
256 * and reverse the order of arguments for CMP.
258 static void final_rewrite(struct pair_state
*s
, struct prog_instruction
*inst
)
260 struct prog_src_register tmp
;
262 switch(inst
->Opcode
) {
264 inst
->SrcReg
[2] = inst
->SrcReg
[1];
265 inst
->SrcReg
[1].File
= PROGRAM_BUILTIN
;
266 inst
->SrcReg
[1].Swizzle
= SWIZZLE_1111
;
267 inst
->SrcReg
[1].Negate
= NEGATE_NONE
;
268 inst
->Opcode
= OPCODE_MAD
;
271 tmp
= inst
->SrcReg
[2];
272 inst
->SrcReg
[2] = inst
->SrcReg
[0];
273 inst
->SrcReg
[0] = tmp
;
276 /* AMD say we should use CMP.
277 * However, when we transform
280 * CMP tmp, -r0, -r0, 0;
282 * we get incorrect behaviour on R500 when r0 == 0.0.
283 * It appears that the R500 KIL hardware treats -0.0 as less
286 inst
->SrcReg
[1].File
= PROGRAM_BUILTIN
;
287 inst
->SrcReg
[1].Swizzle
= SWIZZLE_1111
;
288 inst
->SrcReg
[2].File
= PROGRAM_BUILTIN
;
289 inst
->SrcReg
[2].Swizzle
= SWIZZLE_0000
;
290 inst
->Opcode
= OPCODE_MAD
;
293 inst
->SrcReg
[2].File
= PROGRAM_BUILTIN
;
294 inst
->SrcReg
[2].Swizzle
= SWIZZLE_0000
;
295 inst
->Opcode
= OPCODE_MAD
;
305 * Classify an instruction according to which ALUs etc. it needs
307 static void classify_instruction(struct pair_state
*s
,
308 struct prog_instruction
*inst
, struct pair_state_instruction
*pairinst
)
310 pairinst
->NeedRGB
= (inst
->DstReg
.WriteMask
& WRITEMASK_XYZ
) ? 1 : 0;
311 pairinst
->NeedAlpha
= (inst
->DstReg
.WriteMask
& WRITEMASK_W
) ? 1 : 0;
313 switch(inst
->Opcode
) {
331 pairinst
->IsTranscendent
= 1;
332 pairinst
->NeedAlpha
= 1;
335 pairinst
->NeedAlpha
= 1;
338 pairinst
->NeedRGB
= 1;
348 error("Unknown opcode %d\n", inst
->Opcode
);
352 pairinst
->IsOutput
= (inst
->DstReg
.File
== PROGRAM_OUTPUT
);
357 * Count which (input, temporary) register is read and written how often,
358 * and scan the instruction stream to find dependencies.
360 static void scan_instructions(struct pair_state
*s
)
362 struct prog_instruction
*inst
;
363 struct pair_state_instruction
*pairinst
;
366 for(inst
= s
->Program
->Instructions
, pairinst
= s
->Instructions
, ip
= 0;
367 inst
->Opcode
!= OPCODE_END
;
368 ++inst
, ++pairinst
, ++ip
) {
369 final_rewrite(s
, inst
);
370 classify_instruction(s
, inst
, pairinst
);
372 int nsrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
374 for(j
= 0; j
< nsrc
; j
++) {
375 struct pair_register_translation
*t
=
376 get_register(s
, inst
->SrcReg
[j
].File
, inst
->SrcReg
[j
].Index
);
382 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
384 for(i
= 0; i
< 4; ++i
) {
385 GLuint swz
= GET_SWZ(inst
->SrcReg
[j
].Swizzle
, i
);
387 continue; /* constant or NIL swizzle */
389 continue; /* this is an undefined read */
391 /* Do not add a dependency if this instruction
392 * also rewrites the value. The code below adds
393 * a dependency for the DstReg, which is a superset
394 * of the SrcReg dependency. */
395 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
396 inst
->DstReg
.Index
== inst
->SrcReg
[j
].Index
&&
397 GET_BIT(inst
->DstReg
.WriteMask
, swz
))
400 struct reg_value_reader
* r
= &s
->ReaderPool
[s
->ReaderPoolUsed
++];
401 pairinst
->NumDependencies
++;
402 t
->Value
[swz
]->NumReaders
++;
404 r
->Next
= t
->Value
[swz
]->Readers
;
405 t
->Value
[swz
]->Readers
= r
;
410 int ndst
= _mesa_num_inst_dst_regs(inst
->Opcode
);
412 struct pair_register_translation
*t
=
413 get_register(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
417 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
419 for(j
= 0; j
< 4; ++j
) {
420 if (!GET_BIT(inst
->DstReg
.WriteMask
, j
))
423 struct reg_value
* v
= &s
->ValuePool
[s
->ValuePoolUsed
++];
426 pairinst
->NumDependencies
++;
427 t
->Value
[j
]->Next
= v
;
430 pairinst
->Values
[j
] = v
;
437 _mesa_printf("scan(%i): NumDeps = %i\n", ip
, pairinst
->NumDependencies
);
439 if (!pairinst
->NumDependencies
)
440 instruction_ready(s
, ip
);
443 /* Clear the PROGRAM_TEMPORARY state */
445 for(i
= 0; i
< MAX_PROGRAM_TEMPS
; ++i
) {
446 for(j
= 0; j
< 4; ++j
)
447 s
->Temps
[i
].Value
[j
] = 0;
453 * Reserve hardware temporary registers for the program inputs.
455 * @note This allocation is performed explicitly, because the order of inputs
456 * is determined by the RS hardware.
458 static void allocate_input_registers(struct pair_state
*s
)
460 GLuint InputsRead
= s
->Program
->InputsRead
;
465 if (InputsRead
& FRAG_BIT_COL0
)
466 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_COL0
, hwindex
++);
467 InputsRead
&= ~FRAG_BIT_COL0
;
469 /* Secondary color */
470 if (InputsRead
& FRAG_BIT_COL1
)
471 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_COL1
, hwindex
++);
472 InputsRead
&= ~FRAG_BIT_COL1
;
475 for (i
= 0; i
< s
->Ctx
->Const
.MaxTextureUnits
; i
++) {
476 if (InputsRead
& (FRAG_BIT_TEX0
<< i
))
477 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_TEX0
+i
, hwindex
++);
479 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
481 /* Fogcoords treated as a texcoord */
482 if (InputsRead
& FRAG_BIT_FOGC
)
483 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_FOGC
, hwindex
++);
484 InputsRead
&= ~FRAG_BIT_FOGC
;
486 /* fragment position treated as a texcoord */
487 if (InputsRead
& FRAG_BIT_WPOS
)
488 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_WPOS
, hwindex
++);
489 InputsRead
&= ~FRAG_BIT_WPOS
;
493 error("Don't know how to handle inputs 0x%x\n", InputsRead
);
497 static void decrement_dependencies(struct pair_state
*s
, int ip
)
499 struct pair_state_instruction
*pairinst
= s
->Instructions
+ ip
;
500 ASSERT(pairinst
->NumDependencies
> 0);
501 if (!--pairinst
->NumDependencies
)
502 instruction_ready(s
, ip
);
506 * Update the dependency tracking state based on what the instruction
507 * at the given IP does.
509 static void commit_instruction(struct pair_state
*s
, int ip
)
511 struct prog_instruction
*inst
= s
->Program
->Instructions
+ ip
;
512 struct pair_state_instruction
*pairinst
= s
->Instructions
+ ip
;
515 _mesa_printf("commit_instruction(%i)\n", ip
);
517 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
518 struct pair_register_translation
*t
= &s
->Temps
[inst
->DstReg
.Index
];
519 deref_hw_reg(s
, t
->HwIndex
);
522 for(i
= 0; i
< 4; ++i
) {
523 if (!GET_BIT(inst
->DstReg
.WriteMask
, i
))
526 t
->Value
[i
] = pairinst
->Values
[i
];
527 if (t
->Value
[i
]->NumReaders
) {
528 struct reg_value_reader
*r
;
529 for(r
= pairinst
->Values
[i
]->Readers
; r
; r
= r
->Next
)
530 decrement_dependencies(s
, r
->IP
);
531 } else if (t
->Value
[i
]->Next
) {
532 /* This happens when the only reader writes
533 * the register at the same time */
534 decrement_dependencies(s
, t
->Value
[i
]->Next
->IP
);
539 int nsrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
541 for(i
= 0; i
< nsrc
; i
++) {
542 struct pair_register_translation
*t
= get_register(s
, inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
);
546 deref_hw_reg(s
, get_hw_reg(s
, inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
));
548 if (inst
->SrcReg
[i
].File
!= PROGRAM_TEMPORARY
)
552 for(j
= 0; j
< 4; ++j
) {
553 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, j
);
559 /* Do not free a dependency if this instruction
560 * also rewrites the value. See scan_instructions. */
561 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
562 inst
->DstReg
.Index
== inst
->SrcReg
[i
].Index
&&
563 GET_BIT(inst
->DstReg
.WriteMask
, swz
))
566 if (!--t
->Value
[swz
]->NumReaders
) {
567 if (t
->Value
[swz
]->Next
)
568 decrement_dependencies(s
, t
->Value
[swz
]->Next
->IP
);
576 * Emit all ready texture instructions in a single block.
578 * Emit as a single block to (hopefully) sample many textures in parallel,
579 * and to avoid hardware indirections on R300.
581 * In R500, we don't really know when the result of a texture instruction
582 * arrives. So allocate all destinations first, to make sure they do not
583 * arrive early and overwrite a texture coordinate we're going to use later
586 static void emit_all_tex(struct pair_state
*s
)
588 struct pair_state_instruction
*readytex
;
589 struct pair_state_instruction
*pairinst
;
593 // Don't let the ready list change under us!
594 readytex
= s
->ReadyTEX
;
597 // Allocate destination hardware registers in one block to avoid conflicts.
598 for(pairinst
= readytex
; pairinst
; pairinst
= pairinst
->NextReady
) {
599 int ip
= pairinst
- s
->Instructions
;
600 struct prog_instruction
*inst
= s
->Program
->Instructions
+ ip
;
601 if (inst
->Opcode
!= OPCODE_KIL
)
602 get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
606 _mesa_printf(" BEGIN_TEX\n");
608 if (s
->Handler
->BeginTexBlock
)
609 s
->Error
= s
->Error
|| !s
->Handler
->BeginTexBlock(s
->UserData
);
611 for(pairinst
= readytex
; pairinst
; pairinst
= pairinst
->NextReady
) {
612 int ip
= pairinst
- s
->Instructions
;
613 struct prog_instruction
*inst
= s
->Program
->Instructions
+ ip
;
614 commit_instruction(s
, ip
);
616 if (inst
->Opcode
== OPCODE_KIL
)
619 inst
->DstReg
.Index
= get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
621 inst
->SrcReg
[0].Index
= get_hw_reg(s
, inst
->SrcReg
[0].File
, inst
->SrcReg
[0].Index
);
625 _mesa_print_instruction(inst
);
627 s
->Error
= s
->Error
|| !s
->Handler
->EmitTex(s
->UserData
, inst
);
631 _mesa_printf(" END_TEX\n");
635 static int alloc_pair_source(struct pair_state
*s
, struct radeon_pair_instruction
*pair
,
636 struct prog_src_register src
, GLboolean rgb
, GLboolean alpha
)
639 int candidate_quality
= -1;
648 if (src
.File
== PROGRAM_TEMPORARY
|| src
.File
== PROGRAM_INPUT
) {
650 index
= get_hw_reg(s
, src
.File
, src
.Index
);
653 s
->Error
|= !s
->Handler
->EmitConst(s
->UserData
, src
.File
, src
.Index
, &index
);
656 for(i
= 0; i
< 3; ++i
) {
659 if (pair
->RGB
.Src
[i
].Used
) {
660 if (pair
->RGB
.Src
[i
].Constant
!= constant
||
661 pair
->RGB
.Src
[i
].Index
!= index
)
667 if (pair
->Alpha
.Src
[i
].Used
) {
668 if (pair
->Alpha
.Src
[i
].Constant
!= constant
||
669 pair
->Alpha
.Src
[i
].Index
!= index
)
674 if (q
> candidate_quality
) {
675 candidate_quality
= q
;
680 if (candidate
>= 0) {
682 pair
->RGB
.Src
[candidate
].Used
= 1;
683 pair
->RGB
.Src
[candidate
].Constant
= constant
;
684 pair
->RGB
.Src
[candidate
].Index
= index
;
687 pair
->Alpha
.Src
[candidate
].Used
= 1;
688 pair
->Alpha
.Src
[candidate
].Constant
= constant
;
689 pair
->Alpha
.Src
[candidate
].Index
= index
;
697 * Fill the given ALU instruction's opcodes and source operands into the given pair,
700 static GLboolean
fill_instruction_into_pair(struct pair_state
*s
, struct radeon_pair_instruction
*pair
, int ip
)
702 struct pair_state_instruction
*pairinst
= s
->Instructions
+ ip
;
703 struct prog_instruction
*inst
= s
->Program
->Instructions
+ ip
;
705 ASSERT(!pairinst
->NeedRGB
|| pair
->RGB
.Opcode
== OPCODE_NOP
);
706 ASSERT(!pairinst
->NeedAlpha
|| pair
->Alpha
.Opcode
== OPCODE_NOP
);
708 if (pairinst
->NeedRGB
) {
709 if (pairinst
->IsTranscendent
)
710 pair
->RGB
.Opcode
= OPCODE_REPL_ALPHA
;
712 pair
->RGB
.Opcode
= inst
->Opcode
;
713 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
714 pair
->RGB
.Saturate
= 1;
716 if (pairinst
->NeedAlpha
) {
717 pair
->Alpha
.Opcode
= inst
->Opcode
;
718 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
719 pair
->Alpha
.Saturate
= 1;
722 int nargs
= _mesa_num_inst_src_regs(inst
->Opcode
);
725 /* Special case for DDX/DDY (MDH/MDV). */
726 if (inst
->Opcode
== OPCODE_DDX
|| inst
->Opcode
== OPCODE_DDY
) {
727 if (pair
->RGB
.Src
[0].Used
|| pair
->Alpha
.Src
[0].Used
)
733 for(i
= 0; i
< nargs
; ++i
) {
735 if (pairinst
->NeedRGB
&& !pairinst
->IsTranscendent
) {
736 GLboolean srcrgb
= GL_FALSE
;
737 GLboolean srcalpha
= GL_FALSE
;
739 for(j
= 0; j
< 3; ++j
) {
740 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, j
);
746 source
= alloc_pair_source(s
, pair
, inst
->SrcReg
[i
], srcrgb
, srcalpha
);
749 pair
->RGB
.Arg
[i
].Source
= source
;
750 pair
->RGB
.Arg
[i
].Swizzle
= inst
->SrcReg
[i
].Swizzle
& 0x1ff;
751 pair
->RGB
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
752 pair
->RGB
.Arg
[i
].Negate
= !!(inst
->SrcReg
[i
].Negate
& (NEGATE_X
| NEGATE_Y
| NEGATE_Z
));
754 if (pairinst
->NeedAlpha
) {
755 GLboolean srcrgb
= GL_FALSE
;
756 GLboolean srcalpha
= GL_FALSE
;
757 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, pairinst
->IsTranscendent
? 0 : 3);
762 source
= alloc_pair_source(s
, pair
, inst
->SrcReg
[i
], srcrgb
, srcalpha
);
765 pair
->Alpha
.Arg
[i
].Source
= source
;
766 pair
->Alpha
.Arg
[i
].Swizzle
= swz
;
767 pair
->Alpha
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
768 pair
->Alpha
.Arg
[i
].Negate
= !!(inst
->SrcReg
[i
].Negate
& NEGATE_W
);
777 * Fill in the destination register information.
779 * This is split from filling in source registers because we want
780 * to avoid allocating hardware temporaries for destinations until
781 * we are absolutely certain that we're going to emit a certain
782 * instruction pairing.
784 static void fill_dest_into_pair(struct pair_state
*s
, struct radeon_pair_instruction
*pair
, int ip
)
786 struct pair_state_instruction
*pairinst
= s
->Instructions
+ ip
;
787 struct prog_instruction
*inst
= s
->Program
->Instructions
+ ip
;
789 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
) {
790 if (inst
->DstReg
.Index
== FRAG_RESULT_COLOR
) {
791 pair
->RGB
.OutputWriteMask
|= inst
->DstReg
.WriteMask
& WRITEMASK_XYZ
;
792 pair
->Alpha
.OutputWriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
793 } else if (inst
->DstReg
.Index
== FRAG_RESULT_DEPTH
) {
794 pair
->Alpha
.DepthWriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
797 GLuint hwindex
= get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
798 if (pairinst
->NeedRGB
) {
799 pair
->RGB
.DestIndex
= hwindex
;
800 pair
->RGB
.WriteMask
|= inst
->DstReg
.WriteMask
& WRITEMASK_XYZ
;
802 if (pairinst
->NeedAlpha
) {
803 pair
->Alpha
.DestIndex
= hwindex
;
804 pair
->Alpha
.WriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
811 * Find a good ALU instruction or pair of ALU instruction and emit it.
813 * Prefer emitting full ALU instructions, so that when we reach a point
814 * where no full ALU instruction can be emitted, we have more candidates
815 * for RGB/Alpha pairing.
817 static void emit_alu(struct pair_state
*s
)
819 struct radeon_pair_instruction pair
;
821 if (s
->ReadyFullALU
|| !(s
->ReadyRGB
&& s
->ReadyAlpha
)) {
823 if (s
->ReadyFullALU
) {
824 ip
= s
->ReadyFullALU
- s
->Instructions
;
825 s
->ReadyFullALU
= s
->ReadyFullALU
->NextReady
;
826 } else if (s
->ReadyRGB
) {
827 ip
= s
->ReadyRGB
- s
->Instructions
;
828 s
->ReadyRGB
= s
->ReadyRGB
->NextReady
;
830 ip
= s
->ReadyAlpha
- s
->Instructions
;
831 s
->ReadyAlpha
= s
->ReadyAlpha
->NextReady
;
834 _mesa_bzero(&pair
, sizeof(pair
));
835 fill_instruction_into_pair(s
, &pair
, ip
);
836 fill_dest_into_pair(s
, &pair
, ip
);
837 commit_instruction(s
, ip
);
839 struct pair_state_instruction
**prgb
;
840 struct pair_state_instruction
**palpha
;
842 /* Some pairings might fail because they require too
843 * many source slots; try all possible pairings if necessary */
844 for(prgb
= &s
->ReadyRGB
; *prgb
; prgb
= &(*prgb
)->NextReady
) {
845 for(palpha
= &s
->ReadyAlpha
; *palpha
; palpha
= &(*palpha
)->NextReady
) {
846 int rgbip
= *prgb
- s
->Instructions
;
847 int alphaip
= *palpha
- s
->Instructions
;
848 _mesa_bzero(&pair
, sizeof(pair
));
849 fill_instruction_into_pair(s
, &pair
, rgbip
);
850 if (!fill_instruction_into_pair(s
, &pair
, alphaip
))
852 *prgb
= (*prgb
)->NextReady
;
853 *palpha
= (*palpha
)->NextReady
;
854 fill_dest_into_pair(s
, &pair
, rgbip
);
855 fill_dest_into_pair(s
, &pair
, alphaip
);
856 commit_instruction(s
, rgbip
);
857 commit_instruction(s
, alphaip
);
862 /* No success in pairing; just take the first RGB instruction */
863 int ip
= s
->ReadyRGB
- s
->Instructions
;
864 s
->ReadyRGB
= s
->ReadyRGB
->NextReady
;
865 _mesa_bzero(&pair
, sizeof(pair
));
866 fill_instruction_into_pair(s
, &pair
, ip
);
867 fill_dest_into_pair(s
, &pair
, ip
);
868 commit_instruction(s
, ip
);
873 radeonPrintPairInstruction(&pair
);
875 s
->Error
= s
->Error
|| !s
->Handler
->EmitPaired(s
->UserData
, &pair
);
878 static GLubyte
countKillInsts(struct gl_program
*prog
)
880 GLubyte i
, count
= 0;
882 for (i
= 0; i
< prog
->NumInstructions
; ++i
) {
883 if (prog
->Instructions
[i
].Opcode
== OPCODE_KIL
)
890 GLboolean
radeonPairProgram(GLcontext
*ctx
, struct gl_program
*program
,
891 const struct radeon_pair_handler
* handler
, void *userdata
)
895 _mesa_bzero(&s
, sizeof(s
));
899 s
.UserData
= userdata
;
900 s
.Debug
= (RADEON_DEBUG
& DEBUG_PIXEL
) ? GL_TRUE
: GL_FALSE
;
901 s
.Verbose
= GL_FALSE
&& s
.Debug
;
902 s
.NumKillInsts
= countKillInsts(program
);
904 s
.Instructions
= (struct pair_state_instruction
*)_mesa_calloc(
905 sizeof(struct pair_state_instruction
)*s
.Program
->NumInstructions
);
906 s
.ValuePool
= (struct reg_value
*)_mesa_calloc(sizeof(struct reg_value
)*s
.Program
->NumInstructions
*4);
907 s
.ReaderPool
= (struct reg_value_reader
*)_mesa_calloc(
908 sizeof(struct reg_value_reader
)*s
.Program
->NumInstructions
*12);
911 _mesa_printf("Emit paired program\n");
913 scan_instructions(&s
);
914 allocate_input_registers(&s
);
917 (s
.ReadyTEX
|| s
.ReadyRGB
|| s
.ReadyAlpha
|| s
.ReadyFullALU
)) {
921 if (!s
.NumKillInsts
) {
922 struct pair_state_instruction
*pairinst
= s
.DeferredInsts
;
924 if (!pairinst
->NeedAlpha
)
925 add_pairinst_to_list(&s
.ReadyRGB
, pairinst
);
926 else if (!pairinst
->NeedRGB
)
927 add_pairinst_to_list(&s
.ReadyAlpha
, pairinst
);
929 add_pairinst_to_list(&s
.ReadyFullALU
, pairinst
);
931 pairinst
= pairinst
->NextReady
;
933 s
.DeferredInsts
= NULL
;
936 while(s
.ReadyFullALU
|| s
.ReadyRGB
|| s
.ReadyAlpha
)
941 _mesa_printf(" END\n");
943 _mesa_free(s
.Instructions
);
944 _mesa_free(s
.ValuePool
);
945 _mesa_free(s
.ReaderPool
);
951 static void print_pair_src(int i
, struct radeon_pair_instruction_source
* src
)
953 _mesa_printf(" Src%i = %s[%i]", i
, src
->Constant
? "CNST" : "TEMP", src
->Index
);
956 static const char* opcode_string(GLuint opcode
)
958 if (opcode
== OPCODE_REPL_ALPHA
)
961 return _mesa_opcode_string(opcode
);
964 static int num_pairinst_args(GLuint opcode
)
966 if (opcode
== OPCODE_REPL_ALPHA
)
969 return _mesa_num_inst_src_regs(opcode
);
972 static char swizzle_char(GLuint swz
)
975 case SWIZZLE_X
: return 'x';
976 case SWIZZLE_Y
: return 'y';
977 case SWIZZLE_Z
: return 'z';
978 case SWIZZLE_W
: return 'w';
979 case SWIZZLE_ZERO
: return '0';
980 case SWIZZLE_ONE
: return '1';
981 case SWIZZLE_NIL
: return '_';
986 void radeonPrintPairInstruction(struct radeon_pair_instruction
*inst
)
991 _mesa_printf(" RGB: ");
992 for(i
= 0; i
< 3; ++i
) {
993 if (inst
->RGB
.Src
[i
].Used
)
994 print_pair_src(i
, inst
->RGB
.Src
+ i
);
997 _mesa_printf(" Alpha:");
998 for(i
= 0; i
< 3; ++i
) {
999 if (inst
->Alpha
.Src
[i
].Used
)
1000 print_pair_src(i
, inst
->Alpha
.Src
+ i
);
1004 _mesa_printf(" %s%s", opcode_string(inst
->RGB
.Opcode
), inst
->RGB
.Saturate
? "_SAT" : "");
1005 if (inst
->RGB
.WriteMask
)
1006 _mesa_printf(" TEMP[%i].%s%s%s", inst
->RGB
.DestIndex
,
1007 (inst
->RGB
.WriteMask
& 1) ? "x" : "",
1008 (inst
->RGB
.WriteMask
& 2) ? "y" : "",
1009 (inst
->RGB
.WriteMask
& 4) ? "z" : "");
1010 if (inst
->RGB
.OutputWriteMask
)
1011 _mesa_printf(" COLOR.%s%s%s",
1012 (inst
->RGB
.OutputWriteMask
& 1) ? "x" : "",
1013 (inst
->RGB
.OutputWriteMask
& 2) ? "y" : "",
1014 (inst
->RGB
.OutputWriteMask
& 4) ? "z" : "");
1015 nargs
= num_pairinst_args(inst
->RGB
.Opcode
);
1016 for(i
= 0; i
< nargs
; ++i
) {
1017 const char* abs
= inst
->RGB
.Arg
[i
].Abs
? "|" : "";
1018 const char* neg
= inst
->RGB
.Arg
[i
].Negate
? "-" : "";
1019 _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg
, abs
, inst
->RGB
.Arg
[i
].Source
,
1020 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 0)),
1021 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 1)),
1022 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 2)),
1027 _mesa_printf(" %s%s", opcode_string(inst
->Alpha
.Opcode
), inst
->Alpha
.Saturate
? "_SAT" : "");
1028 if (inst
->Alpha
.WriteMask
)
1029 _mesa_printf(" TEMP[%i].w", inst
->Alpha
.DestIndex
);
1030 if (inst
->Alpha
.OutputWriteMask
)
1031 _mesa_printf(" COLOR.w");
1032 if (inst
->Alpha
.DepthWriteMask
)
1033 _mesa_printf(" DEPTH.w");
1034 nargs
= num_pairinst_args(inst
->Alpha
.Opcode
);
1035 for(i
= 0; i
< nargs
; ++i
) {
1036 const char* abs
= inst
->Alpha
.Arg
[i
].Abs
? "|" : "";
1037 const char* neg
= inst
->Alpha
.Arg
[i
].Negate
? "-" : "";
1038 _mesa_printf(", %s%sSrc%i.%c%s", neg
, abs
, inst
->Alpha
.Arg
[i
].Source
,
1039 swizzle_char(inst
->Alpha
.Arg
[i
].Swizzle
), abs
);