2 * Copyright (C) 2008 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 * Perform temporary register allocation and attempt to pair off instructions
32 * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
33 * vs. ALU instruction scheduling.
36 #include "radeon_program_pair.h"
38 #include "memory_pool.h"
39 #include "radeon_compiler.h"
40 #include "shader/prog_print.h"
42 #define error(fmt, args...) do { \
43 fprintf(stderr, "r300 driver problem: %s::%s(): " fmt "\n", \
44 __FILE__, __FUNCTION__, ##args); \
48 struct pair_state_instruction
{
49 struct prog_instruction Instruction
;
50 GLuint IP
; /**< Position of this instruction in original program */
52 GLuint IsTex
:1; /**< Is a texture instruction */
53 GLuint NeedRGB
:1; /**< Needs the RGB ALU */
54 GLuint NeedAlpha
:1; /**< Needs the Alpha ALU */
55 GLuint IsTranscendent
:1; /**< Is a special transcendent instruction */
58 * Number of (read and write) dependencies that must be resolved before
59 * this instruction can be scheduled.
61 GLuint NumDependencies
:5;
64 * Next instruction in the linked list of ready instructions.
66 struct pair_state_instruction
*NextReady
;
69 * Values that this instruction writes
71 struct reg_value
*Values
[4];
76 * Used to keep track of which instructions read a value.
78 struct reg_value_reader
{
79 struct pair_state_instruction
*Reader
;
80 struct reg_value_reader
*Next
;
84 * Used to keep track which values are stored in each component of a
88 struct pair_state_instruction
*Writer
;
89 struct reg_value
*Next
; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
92 * Unordered linked list of instructions that read from this value.
94 struct reg_value_reader
*Readers
;
97 * Number of readers of this value. This is calculated during @ref scan_instructions
98 * and continually decremented during code emission.
99 * When this count reaches zero, the instruction that writes the @ref Next value
106 * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
107 * to the proper hardware temporary.
109 struct pair_register_translation
{
112 GLuint RefCount
:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
115 * Notes the value that is currently contained in each component
116 * (only used for PROGRAM_TEMPORARY registers).
118 struct reg_value
*Value
[4];
122 struct radeon_compiler
* Compiler
;
123 struct gl_program
*Program
;
124 const struct radeon_pair_handler
*Handler
;
130 * Translate Mesa registers to hardware registers
132 struct pair_register_translation Inputs
[FRAG_ATTRIB_MAX
];
133 struct pair_register_translation Temps
[MAX_PROGRAM_TEMPS
];
136 GLuint RefCount
; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
140 * Linked list of instructions that can be scheduled right now,
141 * based on which ALU/TEX resources they require.
143 struct pair_state_instruction
*ReadyFullALU
;
144 struct pair_state_instruction
*ReadyRGB
;
145 struct pair_state_instruction
*ReadyAlpha
;
146 struct pair_state_instruction
*ReadyTEX
;
150 static struct pair_register_translation
*get_register(struct pair_state
*s
, GLuint file
, GLuint index
)
153 case PROGRAM_TEMPORARY
: return &s
->Temps
[index
];
154 case PROGRAM_INPUT
: return &s
->Inputs
[index
];
159 static void alloc_hw_reg(struct pair_state
*s
, GLuint file
, GLuint index
, GLuint hwindex
)
161 struct pair_register_translation
*t
= get_register(s
, file
, index
);
162 ASSERT(!s
->HwTemps
[hwindex
].RefCount
);
163 ASSERT(!t
->Allocated
);
164 s
->HwTemps
[hwindex
].RefCount
= t
->RefCount
;
166 t
->HwIndex
= hwindex
;
169 static GLuint
get_hw_reg(struct pair_state
*s
, GLuint file
, GLuint index
)
173 struct pair_register_translation
*t
= get_register(s
, file
, index
);
175 error("get_hw_reg: %i[%i]\n", file
, index
);
182 for(hwindex
= 0; hwindex
< s
->Handler
->MaxHwTemps
; ++hwindex
)
183 if (!s
->HwTemps
[hwindex
].RefCount
)
186 if (hwindex
>= s
->Handler
->MaxHwTemps
) {
187 error("Ran out of hardware temporaries");
191 alloc_hw_reg(s
, file
, index
, hwindex
);
196 static void deref_hw_reg(struct pair_state
*s
, GLuint hwindex
)
198 if (!s
->HwTemps
[hwindex
].RefCount
) {
199 error("Hwindex %i refcount error", hwindex
);
203 s
->HwTemps
[hwindex
].RefCount
--;
206 static void add_pairinst_to_list(struct pair_state_instruction
**list
, struct pair_state_instruction
*pairinst
)
208 pairinst
->NextReady
= *list
;
213 * The given instruction has become ready. Link it into the ready
216 static void instruction_ready(struct pair_state
*s
, struct pair_state_instruction
*pairinst
)
219 _mesa_printf("instruction_ready(%i)\n", pairinst
->IP
);
222 add_pairinst_to_list(&s
->ReadyTEX
, pairinst
);
223 else if (!pairinst
->NeedAlpha
)
224 add_pairinst_to_list(&s
->ReadyRGB
, pairinst
);
225 else if (!pairinst
->NeedRGB
)
226 add_pairinst_to_list(&s
->ReadyAlpha
, pairinst
);
228 add_pairinst_to_list(&s
->ReadyFullALU
, pairinst
);
233 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
234 * and reverse the order of arguments for CMP.
236 static void final_rewrite(struct pair_state
*s
, struct prog_instruction
*inst
)
238 struct prog_src_register tmp
;
240 switch(inst
->Opcode
) {
242 inst
->SrcReg
[2] = inst
->SrcReg
[1];
243 inst
->SrcReg
[1].File
= PROGRAM_BUILTIN
;
244 inst
->SrcReg
[1].Swizzle
= SWIZZLE_1111
;
245 inst
->SrcReg
[1].Negate
= NEGATE_NONE
;
246 inst
->Opcode
= OPCODE_MAD
;
249 tmp
= inst
->SrcReg
[2];
250 inst
->SrcReg
[2] = inst
->SrcReg
[0];
251 inst
->SrcReg
[0] = tmp
;
254 /* AMD say we should use CMP.
255 * However, when we transform
258 * CMP tmp, -r0, -r0, 0;
260 * we get incorrect behaviour on R500 when r0 == 0.0.
261 * It appears that the R500 KIL hardware treats -0.0 as less
264 inst
->SrcReg
[1].File
= PROGRAM_BUILTIN
;
265 inst
->SrcReg
[1].Swizzle
= SWIZZLE_1111
;
266 inst
->SrcReg
[2].File
= PROGRAM_BUILTIN
;
267 inst
->SrcReg
[2].Swizzle
= SWIZZLE_0000
;
268 inst
->Opcode
= OPCODE_MAD
;
271 inst
->SrcReg
[2].File
= PROGRAM_BUILTIN
;
272 inst
->SrcReg
[2].Swizzle
= SWIZZLE_0000
;
273 inst
->Opcode
= OPCODE_MAD
;
283 * Classify an instruction according to which ALUs etc. it needs
285 static void classify_instruction(struct pair_state
*s
,
286 struct pair_state_instruction
*psi
)
288 psi
->NeedRGB
= (psi
->Instruction
.DstReg
.WriteMask
& WRITEMASK_XYZ
) ? 1 : 0;
289 psi
->NeedAlpha
= (psi
->Instruction
.DstReg
.WriteMask
& WRITEMASK_W
) ? 1 : 0;
291 switch(psi
->Instruction
.Opcode
) {
309 psi
->IsTranscendent
= 1;
326 error("Unknown opcode %d\n", psi
->Instruction
.Opcode
);
333 * Count which (input, temporary) register is read and written how often,
334 * and scan the instruction stream to find dependencies.
336 static void scan_instructions(struct pair_state
*s
)
338 struct prog_instruction
*source
;
341 for(source
= s
->Program
->Instructions
, ip
= 0;
342 source
->Opcode
!= OPCODE_END
;
344 struct pair_state_instruction
*pairinst
= memory_pool_malloc(&s
->Compiler
->Pool
, sizeof(*pairinst
));
345 memset(pairinst
, 0, sizeof(struct pair_state_instruction
));
347 pairinst
->Instruction
= *source
;
349 final_rewrite(s
, &pairinst
->Instruction
);
350 classify_instruction(s
, pairinst
);
352 int nsrc
= _mesa_num_inst_src_regs(pairinst
->Instruction
.Opcode
);
354 for(j
= 0; j
< nsrc
; j
++) {
355 struct pair_register_translation
*t
=
356 get_register(s
, pairinst
->Instruction
.SrcReg
[j
].File
, pairinst
->Instruction
.SrcReg
[j
].Index
);
362 if (pairinst
->Instruction
.SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
364 for(i
= 0; i
< 4; ++i
) {
365 GLuint swz
= GET_SWZ(pairinst
->Instruction
.SrcReg
[j
].Swizzle
, i
);
367 continue; /* constant or NIL swizzle */
369 continue; /* this is an undefined read */
371 /* Do not add a dependency if this instruction
372 * also rewrites the value. The code below adds
373 * a dependency for the DstReg, which is a superset
374 * of the SrcReg dependency. */
375 if (pairinst
->Instruction
.DstReg
.File
== PROGRAM_TEMPORARY
&&
376 pairinst
->Instruction
.DstReg
.Index
== pairinst
->Instruction
.SrcReg
[j
].Index
&&
377 GET_BIT(pairinst
->Instruction
.DstReg
.WriteMask
, swz
))
380 struct reg_value_reader
* r
= memory_pool_malloc(&s
->Compiler
->Pool
, sizeof(*r
));
381 pairinst
->NumDependencies
++;
382 t
->Value
[swz
]->NumReaders
++;
383 r
->Reader
= pairinst
;
384 r
->Next
= t
->Value
[swz
]->Readers
;
385 t
->Value
[swz
]->Readers
= r
;
390 int ndst
= _mesa_num_inst_dst_regs(pairinst
->Instruction
.Opcode
);
392 struct pair_register_translation
*t
=
393 get_register(s
, pairinst
->Instruction
.DstReg
.File
, pairinst
->Instruction
.DstReg
.Index
);
397 if (pairinst
->Instruction
.DstReg
.File
== PROGRAM_TEMPORARY
) {
399 for(j
= 0; j
< 4; ++j
) {
400 if (!GET_BIT(pairinst
->Instruction
.DstReg
.WriteMask
, j
))
403 struct reg_value
* v
= memory_pool_malloc(&s
->Compiler
->Pool
, sizeof(*v
));
404 memset(v
, 0, sizeof(struct reg_value
));
405 v
->Writer
= pairinst
;
407 pairinst
->NumDependencies
++;
408 t
->Value
[j
]->Next
= v
;
411 pairinst
->Values
[j
] = v
;
418 _mesa_printf("scan(%i): NumDeps = %i\n", ip
, pairinst
->NumDependencies
);
420 if (!pairinst
->NumDependencies
)
421 instruction_ready(s
, pairinst
);
424 /* Clear the PROGRAM_TEMPORARY state */
426 for(i
= 0; i
< MAX_PROGRAM_TEMPS
; ++i
) {
427 for(j
= 0; j
< 4; ++j
)
428 s
->Temps
[i
].Value
[j
] = 0;
434 * Reserve hardware temporary registers for the program inputs.
436 * @note This allocation is performed explicitly, because the order of inputs
437 * is determined by the RS hardware.
439 static void allocate_input_registers(struct pair_state
*s
)
441 GLuint InputsRead
= s
->Program
->InputsRead
;
446 if (InputsRead
& FRAG_BIT_COL0
)
447 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_COL0
, hwindex
++);
448 InputsRead
&= ~FRAG_BIT_COL0
;
450 /* Secondary color */
451 if (InputsRead
& FRAG_BIT_COL1
)
452 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_COL1
, hwindex
++);
453 InputsRead
&= ~FRAG_BIT_COL1
;
456 for (i
= 0; i
< 8; i
++) {
457 if (InputsRead
& (FRAG_BIT_TEX0
<< i
))
458 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_TEX0
+i
, hwindex
++);
460 InputsRead
&= ~FRAG_BITS_TEX_ANY
;
462 /* Fogcoords treated as a texcoord */
463 if (InputsRead
& FRAG_BIT_FOGC
)
464 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_FOGC
, hwindex
++);
465 InputsRead
&= ~FRAG_BIT_FOGC
;
467 /* fragment position treated as a texcoord */
468 if (InputsRead
& FRAG_BIT_WPOS
)
469 alloc_hw_reg(s
, PROGRAM_INPUT
, FRAG_ATTRIB_WPOS
, hwindex
++);
470 InputsRead
&= ~FRAG_BIT_WPOS
;
474 error("Don't know how to handle inputs 0x%x\n", InputsRead
);
478 static void decrement_dependencies(struct pair_state
*s
, struct pair_state_instruction
*pairinst
)
480 ASSERT(pairinst
->NumDependencies
> 0);
481 if (!--pairinst
->NumDependencies
)
482 instruction_ready(s
, pairinst
);
486 * Update the dependency tracking state based on what the instruction
487 * at the given IP does.
489 static void commit_instruction(struct pair_state
*s
, struct pair_state_instruction
*pairinst
)
491 struct prog_instruction
*inst
= &pairinst
->Instruction
;
494 _mesa_printf("commit_instruction(%i)\n", pairinst
->IP
);
496 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
497 struct pair_register_translation
*t
= &s
->Temps
[inst
->DstReg
.Index
];
498 deref_hw_reg(s
, t
->HwIndex
);
501 for(i
= 0; i
< 4; ++i
) {
502 if (!GET_BIT(inst
->DstReg
.WriteMask
, i
))
505 t
->Value
[i
] = pairinst
->Values
[i
];
506 if (t
->Value
[i
]->NumReaders
) {
507 struct reg_value_reader
*r
;
508 for(r
= pairinst
->Values
[i
]->Readers
; r
; r
= r
->Next
)
509 decrement_dependencies(s
, r
->Reader
);
510 } else if (t
->Value
[i
]->Next
) {
511 /* This happens when the only reader writes
512 * the register at the same time */
513 decrement_dependencies(s
, t
->Value
[i
]->Next
->Writer
);
518 int nsrc
= _mesa_num_inst_src_regs(inst
->Opcode
);
520 for(i
= 0; i
< nsrc
; i
++) {
521 struct pair_register_translation
*t
= get_register(s
, inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
);
525 deref_hw_reg(s
, get_hw_reg(s
, inst
->SrcReg
[i
].File
, inst
->SrcReg
[i
].Index
));
527 if (inst
->SrcReg
[i
].File
!= PROGRAM_TEMPORARY
)
531 for(j
= 0; j
< 4; ++j
) {
532 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, j
);
538 /* Do not free a dependency if this instruction
539 * also rewrites the value. See scan_instructions. */
540 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
&&
541 inst
->DstReg
.Index
== inst
->SrcReg
[i
].Index
&&
542 GET_BIT(inst
->DstReg
.WriteMask
, swz
))
545 if (!--t
->Value
[swz
]->NumReaders
) {
546 if (t
->Value
[swz
]->Next
)
547 decrement_dependencies(s
, t
->Value
[swz
]->Next
->Writer
);
555 * Emit all ready texture instructions in a single block.
557 * Emit as a single block to (hopefully) sample many textures in parallel,
558 * and to avoid hardware indirections on R300.
560 * In R500, we don't really know when the result of a texture instruction
561 * arrives. So allocate all destinations first, to make sure they do not
562 * arrive early and overwrite a texture coordinate we're going to use later
565 static void emit_all_tex(struct pair_state
*s
)
567 struct pair_state_instruction
*readytex
;
568 struct pair_state_instruction
*pairinst
;
572 // Don't let the ready list change under us!
573 readytex
= s
->ReadyTEX
;
576 // Allocate destination hardware registers in one block to avoid conflicts.
577 for(pairinst
= readytex
; pairinst
; pairinst
= pairinst
->NextReady
) {
578 struct prog_instruction
*inst
= &pairinst
->Instruction
;
579 if (inst
->Opcode
!= OPCODE_KIL
)
580 get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
583 if (s
->Compiler
->Debug
)
584 _mesa_printf(" BEGIN_TEX\n");
586 if (s
->Handler
->BeginTexBlock
)
587 s
->Error
= s
->Error
|| !s
->Handler
->BeginTexBlock(s
->UserData
);
589 for(pairinst
= readytex
; pairinst
; pairinst
= pairinst
->NextReady
) {
590 struct prog_instruction
*inst
= &pairinst
->Instruction
;
591 commit_instruction(s
, pairinst
);
593 if (inst
->Opcode
!= OPCODE_KIL
)
594 inst
->DstReg
.Index
= get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
595 inst
->SrcReg
[0].Index
= get_hw_reg(s
, inst
->SrcReg
[0].File
, inst
->SrcReg
[0].Index
);
597 if (s
->Compiler
->Debug
) {
599 _mesa_print_instruction(inst
);
603 struct radeon_pair_texture_instruction rpti
;
605 switch(inst
->Opcode
) {
606 case OPCODE_TEX
: rpti
.Opcode
= RADEON_OPCODE_TEX
; break;
607 case OPCODE_TXB
: rpti
.Opcode
= RADEON_OPCODE_TXB
; break;
608 case OPCODE_TXP
: rpti
.Opcode
= RADEON_OPCODE_TXP
; break;
610 case OPCODE_KIL
: rpti
.Opcode
= RADEON_OPCODE_KIL
; break;
613 rpti
.DestIndex
= inst
->DstReg
.Index
;
614 rpti
.WriteMask
= inst
->DstReg
.WriteMask
;
615 rpti
.TexSrcUnit
= inst
->TexSrcUnit
;
616 rpti
.TexSrcTarget
= inst
->TexSrcTarget
;
617 rpti
.SrcIndex
= inst
->SrcReg
[0].Index
;
618 rpti
.SrcSwizzle
= inst
->SrcReg
[0].Swizzle
;
620 s
->Error
= s
->Error
|| !s
->Handler
->EmitTex(s
->UserData
, &rpti
);
623 if (s
->Compiler
->Debug
)
624 _mesa_printf(" END_TEX\n");
628 static int alloc_pair_source(struct pair_state
*s
, struct radeon_pair_instruction
*pair
,
629 struct prog_src_register src
, GLboolean rgb
, GLboolean alpha
)
632 int candidate_quality
= -1;
641 if (src
.File
== PROGRAM_TEMPORARY
|| src
.File
== PROGRAM_INPUT
) {
643 index
= get_hw_reg(s
, src
.File
, src
.Index
);
646 s
->Error
|= !s
->Handler
->EmitConst(s
->UserData
, src
.File
, src
.Index
, &index
);
649 for(i
= 0; i
< 3; ++i
) {
652 if (pair
->RGB
.Src
[i
].Used
) {
653 if (pair
->RGB
.Src
[i
].Constant
!= constant
||
654 pair
->RGB
.Src
[i
].Index
!= index
)
660 if (pair
->Alpha
.Src
[i
].Used
) {
661 if (pair
->Alpha
.Src
[i
].Constant
!= constant
||
662 pair
->Alpha
.Src
[i
].Index
!= index
)
667 if (q
> candidate_quality
) {
668 candidate_quality
= q
;
673 if (candidate
>= 0) {
675 pair
->RGB
.Src
[candidate
].Used
= 1;
676 pair
->RGB
.Src
[candidate
].Constant
= constant
;
677 pair
->RGB
.Src
[candidate
].Index
= index
;
680 pair
->Alpha
.Src
[candidate
].Used
= 1;
681 pair
->Alpha
.Src
[candidate
].Constant
= constant
;
682 pair
->Alpha
.Src
[candidate
].Index
= index
;
690 * Fill the given ALU instruction's opcodes and source operands into the given pair,
693 static GLboolean
fill_instruction_into_pair(
694 struct pair_state
*s
,
695 struct radeon_pair_instruction
*pair
,
696 struct pair_state_instruction
*pairinst
)
698 struct prog_instruction
*inst
= &pairinst
->Instruction
;
700 ASSERT(!pairinst
->NeedRGB
|| pair
->RGB
.Opcode
== OPCODE_NOP
);
701 ASSERT(!pairinst
->NeedAlpha
|| pair
->Alpha
.Opcode
== OPCODE_NOP
);
703 if (pairinst
->NeedRGB
) {
704 if (pairinst
->IsTranscendent
)
705 pair
->RGB
.Opcode
= OPCODE_REPL_ALPHA
;
707 pair
->RGB
.Opcode
= inst
->Opcode
;
708 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
709 pair
->RGB
.Saturate
= 1;
711 if (pairinst
->NeedAlpha
) {
712 pair
->Alpha
.Opcode
= inst
->Opcode
;
713 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
714 pair
->Alpha
.Saturate
= 1;
717 int nargs
= _mesa_num_inst_src_regs(inst
->Opcode
);
720 /* Special case for DDX/DDY (MDH/MDV). */
721 if (inst
->Opcode
== OPCODE_DDX
|| inst
->Opcode
== OPCODE_DDY
) {
722 if (pair
->RGB
.Src
[0].Used
|| pair
->Alpha
.Src
[0].Used
)
728 for(i
= 0; i
< nargs
; ++i
) {
730 if (pairinst
->NeedRGB
&& !pairinst
->IsTranscendent
) {
731 GLboolean srcrgb
= GL_FALSE
;
732 GLboolean srcalpha
= GL_FALSE
;
734 for(j
= 0; j
< 3; ++j
) {
735 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, j
);
741 source
= alloc_pair_source(s
, pair
, inst
->SrcReg
[i
], srcrgb
, srcalpha
);
744 pair
->RGB
.Arg
[i
].Source
= source
;
745 pair
->RGB
.Arg
[i
].Swizzle
= inst
->SrcReg
[i
].Swizzle
& 0x1ff;
746 pair
->RGB
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
747 pair
->RGB
.Arg
[i
].Negate
= !!(inst
->SrcReg
[i
].Negate
& (NEGATE_X
| NEGATE_Y
| NEGATE_Z
));
749 if (pairinst
->NeedAlpha
) {
750 GLboolean srcrgb
= GL_FALSE
;
751 GLboolean srcalpha
= GL_FALSE
;
752 GLuint swz
= GET_SWZ(inst
->SrcReg
[i
].Swizzle
, pairinst
->IsTranscendent
? 0 : 3);
757 source
= alloc_pair_source(s
, pair
, inst
->SrcReg
[i
], srcrgb
, srcalpha
);
760 pair
->Alpha
.Arg
[i
].Source
= source
;
761 pair
->Alpha
.Arg
[i
].Swizzle
= swz
;
762 pair
->Alpha
.Arg
[i
].Abs
= inst
->SrcReg
[i
].Abs
;
763 pair
->Alpha
.Arg
[i
].Negate
= !!(inst
->SrcReg
[i
].Negate
& NEGATE_W
);
772 * Fill in the destination register information.
774 * This is split from filling in source registers because we want
775 * to avoid allocating hardware temporaries for destinations until
776 * we are absolutely certain that we're going to emit a certain
777 * instruction pairing.
779 static void fill_dest_into_pair(
780 struct pair_state
*s
,
781 struct radeon_pair_instruction
*pair
,
782 struct pair_state_instruction
*pairinst
)
784 struct prog_instruction
*inst
= &pairinst
->Instruction
;
786 if (inst
->DstReg
.File
== PROGRAM_OUTPUT
) {
787 if (inst
->DstReg
.Index
== FRAG_RESULT_COLOR
) {
788 pair
->RGB
.OutputWriteMask
|= inst
->DstReg
.WriteMask
& WRITEMASK_XYZ
;
789 pair
->Alpha
.OutputWriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
790 } else if (inst
->DstReg
.Index
== FRAG_RESULT_DEPTH
) {
791 pair
->Alpha
.DepthWriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
794 GLuint hwindex
= get_hw_reg(s
, inst
->DstReg
.File
, inst
->DstReg
.Index
);
795 if (pairinst
->NeedRGB
) {
796 pair
->RGB
.DestIndex
= hwindex
;
797 pair
->RGB
.WriteMask
|= inst
->DstReg
.WriteMask
& WRITEMASK_XYZ
;
799 if (pairinst
->NeedAlpha
) {
800 pair
->Alpha
.DestIndex
= hwindex
;
801 pair
->Alpha
.WriteMask
|= GET_BIT(inst
->DstReg
.WriteMask
, 3);
808 * Find a good ALU instruction or pair of ALU instruction and emit it.
810 * Prefer emitting full ALU instructions, so that when we reach a point
811 * where no full ALU instruction can be emitted, we have more candidates
812 * for RGB/Alpha pairing.
814 static void emit_alu(struct pair_state
*s
)
816 struct radeon_pair_instruction pair
;
817 struct pair_state_instruction
*psi
;
819 if (s
->ReadyFullALU
|| !(s
->ReadyRGB
&& s
->ReadyAlpha
)) {
820 if (s
->ReadyFullALU
) {
821 psi
= s
->ReadyFullALU
;
822 s
->ReadyFullALU
= s
->ReadyFullALU
->NextReady
;
823 } else if (s
->ReadyRGB
) {
825 s
->ReadyRGB
= s
->ReadyRGB
->NextReady
;
828 s
->ReadyAlpha
= s
->ReadyAlpha
->NextReady
;
831 _mesa_bzero(&pair
, sizeof(pair
));
832 fill_instruction_into_pair(s
, &pair
, psi
);
833 fill_dest_into_pair(s
, &pair
, psi
);
834 commit_instruction(s
, psi
);
836 struct pair_state_instruction
**prgb
;
837 struct pair_state_instruction
**palpha
;
839 /* Some pairings might fail because they require too
840 * many source slots; try all possible pairings if necessary */
841 for(prgb
= &s
->ReadyRGB
; *prgb
; prgb
= &(*prgb
)->NextReady
) {
842 for(palpha
= &s
->ReadyAlpha
; *palpha
; palpha
= &(*palpha
)->NextReady
) {
843 struct pair_state_instruction
* psirgb
= *prgb
;
844 struct pair_state_instruction
* psialpha
= *palpha
;
845 _mesa_bzero(&pair
, sizeof(pair
));
846 fill_instruction_into_pair(s
, &pair
, psirgb
);
847 if (!fill_instruction_into_pair(s
, &pair
, psialpha
))
849 *prgb
= (*prgb
)->NextReady
;
850 *palpha
= (*palpha
)->NextReady
;
851 fill_dest_into_pair(s
, &pair
, psirgb
);
852 fill_dest_into_pair(s
, &pair
, psialpha
);
853 commit_instruction(s
, psirgb
);
854 commit_instruction(s
, psialpha
);
859 /* No success in pairing; just take the first RGB instruction */
861 s
->ReadyRGB
= s
->ReadyRGB
->NextReady
;
863 _mesa_bzero(&pair
, sizeof(pair
));
864 fill_instruction_into_pair(s
, &pair
, psi
);
865 fill_dest_into_pair(s
, &pair
, psi
);
866 commit_instruction(s
, psi
);
870 if (s
->Compiler
->Debug
)
871 radeonPrintPairInstruction(&pair
);
873 s
->Error
= s
->Error
|| !s
->Handler
->EmitPaired(s
->UserData
, &pair
);
877 GLboolean
radeonPairProgram(
878 struct radeon_compiler
* compiler
,
879 struct gl_program
*program
,
880 const struct radeon_pair_handler
* handler
, void *userdata
)
884 _mesa_bzero(&s
, sizeof(s
));
885 s
.Compiler
= compiler
;
888 s
.UserData
= userdata
;
889 s
.Verbose
= GL_FALSE
&& s
.Compiler
->Debug
;
891 if (s
.Compiler
->Debug
)
892 _mesa_printf("Emit paired program\n");
894 scan_instructions(&s
);
895 allocate_input_registers(&s
);
898 (s
.ReadyTEX
|| s
.ReadyRGB
|| s
.ReadyAlpha
|| s
.ReadyFullALU
)) {
902 while(s
.ReadyFullALU
|| s
.ReadyRGB
|| s
.ReadyAlpha
)
906 if (s
.Compiler
->Debug
)
907 _mesa_printf(" END\n");
913 static void print_pair_src(int i
, struct radeon_pair_instruction_source
* src
)
915 _mesa_printf(" Src%i = %s[%i]", i
, src
->Constant
? "CNST" : "TEMP", src
->Index
);
918 static const char* opcode_string(GLuint opcode
)
920 if (opcode
== OPCODE_REPL_ALPHA
)
923 return _mesa_opcode_string(opcode
);
926 static int num_pairinst_args(GLuint opcode
)
928 if (opcode
== OPCODE_REPL_ALPHA
)
931 return _mesa_num_inst_src_regs(opcode
);
934 static char swizzle_char(GLuint swz
)
937 case SWIZZLE_X
: return 'x';
938 case SWIZZLE_Y
: return 'y';
939 case SWIZZLE_Z
: return 'z';
940 case SWIZZLE_W
: return 'w';
941 case SWIZZLE_ZERO
: return '0';
942 case SWIZZLE_ONE
: return '1';
943 case SWIZZLE_NIL
: return '_';
948 void radeonPrintPairInstruction(struct radeon_pair_instruction
*inst
)
953 _mesa_printf(" RGB: ");
954 for(i
= 0; i
< 3; ++i
) {
955 if (inst
->RGB
.Src
[i
].Used
)
956 print_pair_src(i
, inst
->RGB
.Src
+ i
);
959 _mesa_printf(" Alpha:");
960 for(i
= 0; i
< 3; ++i
) {
961 if (inst
->Alpha
.Src
[i
].Used
)
962 print_pair_src(i
, inst
->Alpha
.Src
+ i
);
966 _mesa_printf(" %s%s", opcode_string(inst
->RGB
.Opcode
), inst
->RGB
.Saturate
? "_SAT" : "");
967 if (inst
->RGB
.WriteMask
)
968 _mesa_printf(" TEMP[%i].%s%s%s", inst
->RGB
.DestIndex
,
969 (inst
->RGB
.WriteMask
& 1) ? "x" : "",
970 (inst
->RGB
.WriteMask
& 2) ? "y" : "",
971 (inst
->RGB
.WriteMask
& 4) ? "z" : "");
972 if (inst
->RGB
.OutputWriteMask
)
973 _mesa_printf(" COLOR.%s%s%s",
974 (inst
->RGB
.OutputWriteMask
& 1) ? "x" : "",
975 (inst
->RGB
.OutputWriteMask
& 2) ? "y" : "",
976 (inst
->RGB
.OutputWriteMask
& 4) ? "z" : "");
977 nargs
= num_pairinst_args(inst
->RGB
.Opcode
);
978 for(i
= 0; i
< nargs
; ++i
) {
979 const char* abs
= inst
->RGB
.Arg
[i
].Abs
? "|" : "";
980 const char* neg
= inst
->RGB
.Arg
[i
].Negate
? "-" : "";
981 _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg
, abs
, inst
->RGB
.Arg
[i
].Source
,
982 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 0)),
983 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 1)),
984 swizzle_char(GET_SWZ(inst
->RGB
.Arg
[i
].Swizzle
, 2)),
989 _mesa_printf(" %s%s", opcode_string(inst
->Alpha
.Opcode
), inst
->Alpha
.Saturate
? "_SAT" : "");
990 if (inst
->Alpha
.WriteMask
)
991 _mesa_printf(" TEMP[%i].w", inst
->Alpha
.DestIndex
);
992 if (inst
->Alpha
.OutputWriteMask
)
993 _mesa_printf(" COLOR.w");
994 if (inst
->Alpha
.DepthWriteMask
)
995 _mesa_printf(" DEPTH.w");
996 nargs
= num_pairinst_args(inst
->Alpha
.Opcode
);
997 for(i
= 0; i
< nargs
; ++i
) {
998 const char* abs
= inst
->Alpha
.Arg
[i
].Abs
? "|" : "";
999 const char* neg
= inst
->Alpha
.Arg
[i
].Negate
? "-" : "";
1000 _mesa_printf(", %s%sSrc%i.%c%s", neg
, abs
, inst
->Alpha
.Arg
[i
].Source
,
1001 swizzle_char(inst
->Alpha
.Arg
[i
].Swizzle
), abs
);