2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_program_pair.h"
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
41 struct schedule_instruction
{
42 struct rc_instruction
* Instruction
;
44 /** Next instruction in the linked list of ready instructions. */
45 struct schedule_instruction
*NextReady
;
47 /** Values that this instruction reads and writes */
48 struct reg_value
* WriteValues
[4];
49 struct reg_value
* ReadValues
[12];
50 unsigned int NumWriteValues
:3;
51 unsigned int NumReadValues
:4;
54 * Number of (read and write) dependencies that must be resolved before
55 * this instruction can be scheduled.
57 unsigned int NumDependencies
:5;
59 /** List of all readers (see rc_get_readers() for the definition of
60 * "all readers"), even those outside the basic block this instruction
62 struct rc_reader_data GlobalReaders
;
67 * Used to keep track of which instructions read a value.
69 struct reg_value_reader
{
70 struct schedule_instruction
*Reader
;
71 struct reg_value_reader
*Next
;
75 * Used to keep track which values are stored in each component of a
79 struct schedule_instruction
* Writer
;
82 * Unordered linked list of instructions that read from this value.
83 * When this value becomes available, we increase all readers'
86 struct reg_value_reader
*Readers
;
89 * Number of readers of this value. This is decremented each time
90 * a reader of the value is committed.
91 * When the reader cound reaches zero, the dependency count
92 * of the instruction writing \ref Next is decremented.
94 unsigned int NumReaders
;
96 struct reg_value
*Next
; /**< Pointer to the next value to be written to the same register */
99 struct register_state
{
100 struct reg_value
* Values
[4];
104 struct rc_instruciont
* Inst
;
105 unsigned int OldIndex
:(RC_REGISTER_INDEX_BITS
+1);
106 unsigned int OldSwizzle
:3;
107 unsigned int NewIndex
:(RC_REGISTER_INDEX_BITS
+1);
108 unsigned int NewSwizzle
:3;
109 unsigned int OnlyTexReads
:1;
110 struct remap_reg
* Next
;
113 struct schedule_state
{
114 struct radeon_compiler
* C
;
115 struct schedule_instruction
* Current
;
117 struct register_state Temporary
[RC_REGISTER_MAX_INDEX
];
120 * Linked lists of instructions that can be scheduled right now,
121 * based on which ALU/TEX resources they require.
124 struct schedule_instruction
*ReadyFullALU
;
125 struct schedule_instruction
*ReadyRGB
;
126 struct schedule_instruction
*ReadyAlpha
;
127 struct schedule_instruction
*ReadyTEX
;
131 static struct reg_value
** get_reg_valuep(struct schedule_state
* s
,
132 rc_register_file file
, unsigned int index
, unsigned int chan
)
134 if (file
!= RC_FILE_TEMPORARY
)
137 if (index
>= RC_REGISTER_MAX_INDEX
) {
138 rc_error(s
->C
, "%s: index %i out of bounds\n", __FUNCTION__
, index
);
142 return &s
->Temporary
[index
].Values
[chan
];
145 static void add_inst_to_list(struct schedule_instruction
** list
, struct schedule_instruction
* inst
)
147 inst
->NextReady
= *list
;
151 static void add_inst_to_list_end(struct schedule_instruction
** list
,
152 struct schedule_instruction
* inst
)
157 struct schedule_instruction
* temp
= *list
;
158 while(temp
->NextReady
){
159 temp
= temp
->NextReady
;
161 temp
->NextReady
= inst
;
165 static void instruction_ready(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
167 DBG("%i is now ready\n", sinst
->Instruction
->IP
);
169 /* Adding Ready TEX instructions to the end of the "Ready List" helps
170 * us emit TEX instructions in blocks without losing our place. */
171 if (sinst
->Instruction
->Type
== RC_INSTRUCTION_NORMAL
)
172 add_inst_to_list_end(&s
->ReadyTEX
, sinst
);
173 else if (sinst
->Instruction
->U
.P
.Alpha
.Opcode
== RC_OPCODE_NOP
)
174 add_inst_to_list(&s
->ReadyRGB
, sinst
);
175 else if (sinst
->Instruction
->U
.P
.RGB
.Opcode
== RC_OPCODE_NOP
)
176 add_inst_to_list(&s
->ReadyAlpha
, sinst
);
178 add_inst_to_list(&s
->ReadyFullALU
, sinst
);
181 static void decrease_dependencies(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
183 assert(sinst
->NumDependencies
> 0);
184 sinst
->NumDependencies
--;
185 if (!sinst
->NumDependencies
)
186 instruction_ready(s
, sinst
);
190 * This function decreases the dependencies of the next instruction that
191 * wants to write to each of sinst's read values.
193 static void commit_update_reads(struct schedule_state
* s
,
194 struct schedule_instruction
* sinst
){
196 for(i
= 0; i
< sinst
->NumReadValues
; ++i
) {
197 struct reg_value
* v
= sinst
->ReadValues
[i
];
198 assert(v
->NumReaders
> 0);
200 if (!v
->NumReaders
) {
202 decrease_dependencies(s
, v
->Next
->Writer
);
207 static void commit_update_writes(struct schedule_state
* s
,
208 struct schedule_instruction
* sinst
){
210 for(i
= 0; i
< sinst
->NumWriteValues
; ++i
) {
211 struct reg_value
* v
= sinst
->WriteValues
[i
];
213 for(struct reg_value_reader
* r
= v
->Readers
; r
; r
= r
->Next
) {
214 decrease_dependencies(s
, r
->Reader
);
217 /* This happens in instruction sequences of the type
220 * See also the subtlety in how instructions that both
221 * read and write the same register are scanned.
224 decrease_dependencies(s
, v
->Next
->Writer
);
229 static void commit_alu_instruction(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
231 DBG("%i: commit\n", sinst
->Instruction
->IP
);
233 commit_update_reads(s
, sinst
);
235 commit_update_writes(s
, sinst
);
239 * Emit all ready texture instructions in a single block.
241 * Emit as a single block to (hopefully) sample many textures in parallel,
242 * and to avoid hardware indirections on R300.
244 static void emit_all_tex(struct schedule_state
* s
, struct rc_instruction
* before
)
246 struct schedule_instruction
*readytex
;
247 struct rc_instruction
* inst_begin
;
251 /* Node marker for R300 */
252 inst_begin
= rc_insert_new_instruction(s
->C
, before
->Prev
);
253 inst_begin
->U
.I
.Opcode
= RC_OPCODE_BEGIN_TEX
;
255 /* Link texture instructions back in */
256 readytex
= s
->ReadyTEX
;
258 rc_insert_instruction(before
->Prev
, readytex
->Instruction
);
259 DBG("%i: commit TEX reads\n", readytex
->Instruction
->IP
);
261 /* All of the TEX instructions in the same TEX block have
262 * their source registers read from before any of the
263 * instructions in that block write to their destination
264 * registers. This means that when we commit a TEX
265 * instruction, any other TEX instruction that wants to write
266 * to one of the committed instruction's source register can be
267 * marked as ready and should be emitted in the same TEX
268 * block. This prevents the following sequence from being
269 * emitted in two different TEX blocks:
270 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
271 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
273 commit_update_reads(s
, readytex
);
274 readytex
= readytex
->NextReady
;
276 readytex
= s
->ReadyTEX
;
279 DBG("%i: commit TEX writes\n", readytex
->Instruction
->IP
);
280 commit_update_writes(s
, readytex
);
281 readytex
= readytex
->NextReady
;
285 /* This is a helper function for destructive_merge_instructions(). It helps
286 * merge presubtract sources from two instructions and makes sure the
287 * presubtract sources end up in the correct spot. This function assumes that
288 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
289 * but no scalar instruction (alpha).
290 * @return 0 if merging the presubtract sources fails.
291 * @retrun 1 if merging the presubtract sources succeeds.
293 static int merge_presub_sources(
294 struct rc_pair_instruction
* dst_full
,
295 struct rc_pair_sub_instruction src
,
298 unsigned int srcp_src
, srcp_regs
, is_rgb
, is_alpha
;
299 struct rc_pair_sub_instruction
* dst_sub
;
300 const struct rc_opcode_info
* info
;
302 assert(dst_full
->Alpha
.Opcode
== RC_OPCODE_NOP
);
308 dst_sub
= &dst_full
->RGB
;
310 case RC_SOURCE_ALPHA
:
313 dst_sub
= &dst_full
->Alpha
;
320 info
= rc_get_opcode_info(dst_full
->RGB
.Opcode
);
322 if (dst_sub
->Src
[RC_PAIR_PRESUB_SRC
].Used
)
325 srcp_regs
= rc_presubtract_src_reg_count(
326 src
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
327 for(srcp_src
= 0; srcp_src
< srcp_regs
; srcp_src
++) {
330 unsigned int one_way
= 0;
331 struct rc_pair_instruction_source srcp
= src
.Src
[srcp_src
];
332 struct rc_pair_instruction_source temp
;
334 free_source
= rc_pair_alloc_source(dst_full
, is_rgb
, is_alpha
,
335 srcp
.File
, srcp
.Index
);
337 /* If free_source < 0 then there are no free source
342 temp
= dst_sub
->Src
[srcp_src
];
343 dst_sub
->Src
[srcp_src
] = dst_sub
->Src
[free_source
];
345 /* srcp needs src0 and src1 to be the same */
346 if (free_source
< srcp_src
) {
349 free_source
= rc_pair_alloc_source(dst_full
, is_rgb
,
350 is_alpha
, temp
.File
, temp
.Index
);
355 dst_sub
->Src
[free_source
] = temp
;
358 /* If free_source == srcp_src, then the presubtract
359 * source is already in the correct place. */
360 if (free_source
== srcp_src
)
363 /* Shuffle the sources, so we can put the
364 * presubtract source in the correct place. */
365 for(arg
= 0; arg
< info
->NumSrcRegs
; arg
++) {
366 /*If this arg does not read from an rgb source,
368 if (!(rc_source_type_swz(dst_full
->RGB
.Arg
[arg
].Swizzle
)
373 if (dst_full
->RGB
.Arg
[arg
].Source
== srcp_src
)
374 dst_full
->RGB
.Arg
[arg
].Source
= free_source
;
375 /* We need to do this just in case register
376 * is one of the sources already, but in the
378 else if(dst_full
->RGB
.Arg
[arg
].Source
== free_source
380 dst_full
->RGB
.Arg
[arg
].Source
= srcp_src
;
388 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
389 static int destructive_merge_instructions(
390 struct rc_pair_instruction
* rgb
,
391 struct rc_pair_instruction
* alpha
)
393 const struct rc_opcode_info
* opcode
;
395 assert(rgb
->Alpha
.Opcode
== RC_OPCODE_NOP
);
396 assert(alpha
->RGB
.Opcode
== RC_OPCODE_NOP
);
398 /* Presubtract registers need to be merged first so that registers
399 * needed by the presubtract operation can be placed in src0 and/or
402 /* Merge the rgb presubtract registers. */
403 if (alpha
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
404 if (!merge_presub_sources(rgb
, alpha
->RGB
, RC_SOURCE_RGB
)) {
408 /* Merge the alpha presubtract registers */
409 if (alpha
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
410 if(!merge_presub_sources(rgb
, alpha
->Alpha
, RC_SOURCE_ALPHA
)){
415 /* Copy alpha args into rgb */
416 opcode
= rc_get_opcode_info(alpha
->Alpha
.Opcode
);
418 for(unsigned int arg
= 0; arg
< opcode
->NumSrcRegs
; ++arg
) {
419 unsigned int srcrgb
= 0;
420 unsigned int srcalpha
= 0;
421 unsigned int oldsrc
= alpha
->Alpha
.Arg
[arg
].Source
;
422 rc_register_file file
= 0;
423 unsigned int index
= 0;
426 if (GET_SWZ(alpha
->Alpha
.Arg
[arg
].Swizzle
, 0) < 3) {
428 file
= alpha
->RGB
.Src
[oldsrc
].File
;
429 index
= alpha
->RGB
.Src
[oldsrc
].Index
;
430 } else if (GET_SWZ(alpha
->Alpha
.Arg
[arg
].Swizzle
, 0) < 4) {
432 file
= alpha
->Alpha
.Src
[oldsrc
].File
;
433 index
= alpha
->Alpha
.Src
[oldsrc
].Index
;
436 source
= rc_pair_alloc_source(rgb
, srcrgb
, srcalpha
, file
, index
);
440 rgb
->Alpha
.Arg
[arg
].Source
= source
;
441 rgb
->Alpha
.Arg
[arg
].Swizzle
= alpha
->Alpha
.Arg
[arg
].Swizzle
;
442 rgb
->Alpha
.Arg
[arg
].Abs
= alpha
->Alpha
.Arg
[arg
].Abs
;
443 rgb
->Alpha
.Arg
[arg
].Negate
= alpha
->Alpha
.Arg
[arg
].Negate
;
446 /* Copy alpha opcode into rgb */
447 rgb
->Alpha
.Opcode
= alpha
->Alpha
.Opcode
;
448 rgb
->Alpha
.DestIndex
= alpha
->Alpha
.DestIndex
;
449 rgb
->Alpha
.WriteMask
= alpha
->Alpha
.WriteMask
;
450 rgb
->Alpha
.OutputWriteMask
= alpha
->Alpha
.OutputWriteMask
;
451 rgb
->Alpha
.DepthWriteMask
= alpha
->Alpha
.DepthWriteMask
;
452 rgb
->Alpha
.Saturate
= alpha
->Alpha
.Saturate
;
454 /* Merge ALU result writing */
455 if (alpha
->WriteALUResult
) {
456 if (rgb
->WriteALUResult
)
459 rgb
->WriteALUResult
= alpha
->WriteALUResult
;
460 rgb
->ALUResultCompare
= alpha
->ALUResultCompare
;
467 * Try to merge the given instructions into the rgb instructions.
469 * Return true on success; on failure, return false, and keep
470 * the instructions untouched.
472 static int merge_instructions(struct rc_pair_instruction
* rgb
, struct rc_pair_instruction
* alpha
)
474 struct rc_pair_instruction backup
;
476 /*Instructions can't write output registers and ALU result at the
478 if ((rgb
->WriteALUResult
&& alpha
->Alpha
.OutputWriteMask
)
479 || (rgb
->RGB
.OutputWriteMask
&& alpha
->WriteALUResult
)) {
482 memcpy(&backup
, rgb
, sizeof(struct rc_pair_instruction
));
484 if (destructive_merge_instructions(rgb
, alpha
))
487 memcpy(rgb
, &backup
, sizeof(struct rc_pair_instruction
));
491 static void presub_nop(struct rc_instruction
* emitted
) {
492 int prev_rgb_index
, prev_alpha_index
, i
, num_src
;
494 /* We don't need a nop if the previous instruction is a TEX. */
495 if (emitted
->Prev
->Type
!= RC_INSTRUCTION_PAIR
) {
498 if (emitted
->Prev
->U
.P
.RGB
.WriteMask
)
499 prev_rgb_index
= emitted
->Prev
->U
.P
.RGB
.DestIndex
;
502 if (emitted
->Prev
->U
.P
.Alpha
.WriteMask
)
503 prev_alpha_index
= emitted
->Prev
->U
.P
.Alpha
.DestIndex
;
505 prev_alpha_index
= 1;
507 /* Check the previous rgb instruction */
508 if (emitted
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
509 num_src
= rc_presubtract_src_reg_count(
510 emitted
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
511 for (i
= 0; i
< num_src
; i
++) {
512 unsigned int index
= emitted
->U
.P
.RGB
.Src
[i
].Index
;
513 if (emitted
->U
.P
.RGB
.Src
[i
].File
== RC_FILE_TEMPORARY
514 && (index
== prev_rgb_index
515 || index
== prev_alpha_index
)) {
516 emitted
->Prev
->U
.P
.Nop
= 1;
522 /* Check the previous alpha instruction. */
523 if (!emitted
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
526 num_src
= rc_presubtract_src_reg_count(
527 emitted
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
528 for (i
= 0; i
< num_src
; i
++) {
529 unsigned int index
= emitted
->U
.P
.Alpha
.Src
[i
].Index
;
530 if(emitted
->U
.P
.Alpha
.Src
[i
].File
== RC_FILE_TEMPORARY
531 && (index
== prev_rgb_index
|| index
== prev_alpha_index
)) {
532 emitted
->Prev
->U
.P
.Nop
= 1;
538 static void rgb_to_alpha_remap (
539 struct rc_instruction
* inst
,
540 struct rc_pair_instruction_arg
* arg
,
541 rc_register_file old_file
,
543 unsigned int new_index
)
548 for (i
= 0; i
< 3; i
++) {
549 if (get_swz(arg
->Swizzle
, i
) == old_swz
) {
550 SET_SWZ(arg
->Swizzle
, i
, RC_SWIZZLE_W
);
553 new_src_index
= rc_pair_alloc_source(&inst
->U
.P
, 0, 1,
554 old_file
, new_index
);
555 /* This conversion is not possible, we must have made a mistake in
556 * is_rgb_to_alpha_possible. */
557 if (new_src_index
< 0) {
562 arg
->Source
= new_src_index
;
565 static int can_remap(unsigned int opcode
)
576 static int can_convert_opcode_to_alpha(unsigned int opcode
)
591 static void is_rgb_to_alpha_possible(
593 struct rc_instruction
* inst
,
594 struct rc_pair_instruction_arg
* arg
,
595 struct rc_pair_instruction_source
* src
)
597 unsigned int chan_count
= 0;
598 unsigned int alpha_sources
= 0;
600 struct rc_reader_data
* reader_data
= userdata
;
602 if (!can_remap(inst
->U
.P
.RGB
.Opcode
)
603 || !can_remap(inst
->U
.P
.Alpha
.Opcode
)) {
604 reader_data
->Abort
= 1;
611 /* XXX There are some cases where we can still do the conversion if
612 * a reader reads from a presubtract source, but for now we'll prevent
614 if (arg
->Source
== RC_PAIR_PRESUB_SRC
) {
615 reader_data
->Abort
= 1;
619 /* Make sure the source only reads from one component.
620 * XXX We should allow the source to read from the same component twice.
621 * XXX If the index we will be converting to is the same as the
622 * current index, then it is OK to read from more than one component.
624 for (i
= 0; i
< 3; i
++) {
625 rc_swizzle swz
= get_swz(arg
->Swizzle
, i
);
637 if (chan_count
> 1) {
638 reader_data
->Abort
= 1;
642 /* Make sure there are enough alpha sources.
643 * XXX If we know what register all the readers are going
644 * to be remapped to, then in some situations we can still do
645 * the subsitution, even if all 3 alpha sources are being used.*/
646 for (i
= 0; i
< 3; i
++) {
647 if (inst
->U
.P
.Alpha
.Src
[i
].Used
) {
651 if (alpha_sources
> 2) {
652 reader_data
->Abort
= 1;
657 static int convert_rgb_to_alpha(
658 struct schedule_state
* s
,
659 struct schedule_instruction
* sched_inst
)
661 struct rc_pair_instruction
* pair_inst
= &sched_inst
->Instruction
->U
.P
;
662 unsigned int old_mask
= pair_inst
->RGB
.WriteMask
;
663 unsigned int old_swz
= rc_mask_to_swizzle(old_mask
);
664 const struct rc_opcode_info
* info
=
665 rc_get_opcode_info(pair_inst
->RGB
.Opcode
);
669 if (sched_inst
->GlobalReaders
.Abort
)
672 if (!pair_inst
->RGB
.WriteMask
)
675 if (!can_convert_opcode_to_alpha(pair_inst
->RGB
.Opcode
)
676 || !can_convert_opcode_to_alpha(pair_inst
->Alpha
.Opcode
)) {
680 assert(sched_inst
->NumWriteValues
== 1);
682 if (!sched_inst
->WriteValues
[0]) {
687 /* We start at the old index, because if we can reuse the same
688 * register and just change the swizzle then it is more likely we
689 * will be able to convert all the readers. */
690 for (i
= pair_inst
->RGB
.DestIndex
; i
< RC_REGISTER_MAX_INDEX
; i
++) {
691 struct reg_value
** new_regvalp
= get_reg_valuep(
692 s
, RC_FILE_TEMPORARY
, i
, 3);
694 struct reg_value
** old_regvalp
=
697 pair_inst
->RGB
.DestIndex
,
698 rc_mask_to_swizzle(old_mask
));
700 *new_regvalp
= *old_regvalp
;
702 new_regvalp
= get_reg_valuep(s
, RC_FILE_TEMPORARY
, i
, 3);
710 pair_inst
->Alpha
.Opcode
= pair_inst
->RGB
.Opcode
;
711 pair_inst
->Alpha
.DestIndex
= new_index
;
712 pair_inst
->Alpha
.WriteMask
= RC_MASK_W
;
713 pair_inst
->Alpha
.Target
= pair_inst
->RGB
.Target
;
714 pair_inst
->Alpha
.OutputWriteMask
= pair_inst
->RGB
.OutputWriteMask
;
715 pair_inst
->Alpha
.DepthWriteMask
= pair_inst
->RGB
.DepthWriteMask
;
716 pair_inst
->Alpha
.Saturate
= pair_inst
->RGB
.Saturate
;
717 memcpy(pair_inst
->Alpha
.Arg
, pair_inst
->RGB
.Arg
,
718 sizeof(pair_inst
->Alpha
.Arg
));
719 /* Move the swizzles into the first chan */
720 for (i
= 0; i
< info
->NumSrcRegs
; i
++) {
722 for (j
= 0; j
< 3; j
++) {
723 unsigned int swz
= get_swz(pair_inst
->Alpha
.Arg
[i
].Swizzle
, j
);
724 if (swz
!= RC_SWIZZLE_UNUSED
) {
725 pair_inst
->Alpha
.Arg
[i
].Swizzle
=
726 rc_init_swizzle(swz
, 1);
731 pair_inst
->RGB
.Opcode
= RC_OPCODE_NOP
;
732 pair_inst
->RGB
.DestIndex
= 0;
733 pair_inst
->RGB
.WriteMask
= 0;
734 pair_inst
->RGB
.Target
= 0;
735 pair_inst
->RGB
.OutputWriteMask
= 0;
736 pair_inst
->RGB
.DepthWriteMask
= 0;
737 pair_inst
->RGB
.Saturate
= 0;
738 memset(pair_inst
->RGB
.Arg
, 0, sizeof(pair_inst
->RGB
.Arg
));
740 for(i
= 0; i
< sched_inst
->GlobalReaders
.ReaderCount
; i
++) {
741 struct rc_reader reader
= sched_inst
->GlobalReaders
.Readers
[i
];
742 rgb_to_alpha_remap(reader
.Inst
, reader
.U
.P
.Arg
,
743 RC_FILE_TEMPORARY
, old_swz
, new_index
);
749 * Find a good ALU instruction or pair of ALU instruction and emit it.
751 * Prefer emitting full ALU instructions, so that when we reach a point
752 * where no full ALU instruction can be emitted, we have more candidates
753 * for RGB/Alpha pairing.
755 static void emit_one_alu(struct schedule_state
*s
, struct rc_instruction
* before
)
757 struct schedule_instruction
* sinst
;
759 if (s
->ReadyFullALU
) {
760 sinst
= s
->ReadyFullALU
;
761 s
->ReadyFullALU
= s
->ReadyFullALU
->NextReady
;
762 rc_insert_instruction(before
->Prev
, sinst
->Instruction
);
763 commit_alu_instruction(s
, sinst
);
765 struct schedule_instruction
**prgb
;
766 struct schedule_instruction
**palpha
;
767 struct schedule_instruction
*prev
;
769 /* Some pairings might fail because they require too
770 * many source slots; try all possible pairings if necessary */
771 for(prgb
= &s
->ReadyRGB
; *prgb
; prgb
= &(*prgb
)->NextReady
) {
772 for(palpha
= &s
->ReadyAlpha
; *palpha
; palpha
= &(*palpha
)->NextReady
) {
773 struct schedule_instruction
* psirgb
= *prgb
;
774 struct schedule_instruction
* psialpha
= *palpha
;
776 if (!merge_instructions(&psirgb
->Instruction
->U
.P
, &psialpha
->Instruction
->U
.P
))
779 *prgb
= (*prgb
)->NextReady
;
780 *palpha
= (*palpha
)->NextReady
;
781 rc_insert_instruction(before
->Prev
, psirgb
->Instruction
);
782 commit_alu_instruction(s
, psirgb
);
783 commit_alu_instruction(s
, psialpha
);
788 /* No success in pairing, now try to convert one of the RGB
789 * instructions to an Alpha so we can pair it with another RGB.
791 if (s
->ReadyRGB
&& s
->ReadyRGB
->NextReady
) {
792 for(prgb
= &s
->ReadyRGB
; *prgb
; prgb
= &(*prgb
)->NextReady
) {
793 if ((*prgb
)->NumWriteValues
== 1) {
794 struct schedule_instruction
* prgb_next
;
795 if (!convert_rgb_to_alpha(s
, *prgb
))
797 prgb_next
= (*prgb
)->NextReady
;
798 /* Add instruction to the Alpha ready list. */
799 (*prgb
)->NextReady
= s
->ReadyAlpha
;
800 s
->ReadyAlpha
= *prgb
;
801 /* Remove instruction from the RGB ready list.*/
803 prev
->NextReady
= prgb_next
;
805 s
->ReadyRGB
= prgb_next
;
812 /* Still no success in pairing, just take the first RGB
813 * or alpha instruction. */
816 s
->ReadyRGB
= s
->ReadyRGB
->NextReady
;
817 } else if (s
->ReadyAlpha
) {
818 sinst
= s
->ReadyAlpha
;
819 s
->ReadyAlpha
= s
->ReadyAlpha
->NextReady
;
821 /*XXX Something real bad has happened. */
825 rc_insert_instruction(before
->Prev
, sinst
->Instruction
);
826 commit_alu_instruction(s
, sinst
);
829 /* If the instruction we just emitted uses a presubtract value, and
830 * the presubtract sources were written by the previous intstruction,
831 * the previous instruction needs a nop. */
832 presub_nop(before
->Prev
);
835 static void scan_read(void * data
, struct rc_instruction
* inst
,
836 rc_register_file file
, unsigned int index
, unsigned int chan
)
838 struct schedule_state
* s
= data
;
839 struct reg_value
** v
= get_reg_valuep(s
, file
, index
, chan
);
840 struct reg_value_reader
* reader
;
845 if (*v
&& (*v
)->Writer
== s
->Current
) {
846 /* The instruction reads and writes to a register component.
847 * In this case, we only want to increment dependencies by one.
852 DBG("%i: read %i[%i] chan %i\n", s
->Current
->Instruction
->IP
, file
, index
, chan
);
854 reader
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*reader
));
855 reader
->Reader
= s
->Current
;
857 /* In this situation, the instruction reads from a register
858 * that hasn't been written to or read from in the current
860 *v
= memory_pool_malloc(&s
->C
->Pool
, sizeof(struct reg_value
));
861 memset(*v
, 0, sizeof(struct reg_value
));
862 (*v
)->Readers
= reader
;
864 reader
->Next
= (*v
)->Readers
;
865 (*v
)->Readers
= reader
;
866 /* Only update the current instruction's dependencies if the
867 * register it reads from has been written to in this block. */
869 s
->Current
->NumDependencies
++;
874 if (s
->Current
->NumReadValues
>= 12) {
875 rc_error(s
->C
, "%s: NumReadValues overflow\n", __FUNCTION__
);
877 s
->Current
->ReadValues
[s
->Current
->NumReadValues
++] = *v
;
881 static void scan_write(void * data
, struct rc_instruction
* inst
,
882 rc_register_file file
, unsigned int index
, unsigned int chan
)
884 struct schedule_state
* s
= data
;
885 struct reg_value
** pv
= get_reg_valuep(s
, file
, index
, chan
);
886 struct reg_value
* newv
;
891 DBG("%i: write %i[%i] chan %i\n", s
->Current
->Instruction
->IP
, file
, index
, chan
);
893 newv
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*newv
));
894 memset(newv
, 0, sizeof(*newv
));
896 newv
->Writer
= s
->Current
;
900 s
->Current
->NumDependencies
++;
905 if (s
->Current
->NumWriteValues
>= 4) {
906 rc_error(s
->C
, "%s: NumWriteValues overflow\n", __FUNCTION__
);
908 s
->Current
->WriteValues
[s
->Current
->NumWriteValues
++] = newv
;
912 static void is_rgb_to_alpha_possible_normal(
914 struct rc_instruction
* inst
,
915 struct rc_src_register
* src
)
917 struct rc_reader_data
* reader_data
= userdata
;
918 reader_data
->Abort
= 1;
922 static void schedule_block(struct r300_fragment_program_compiler
* c
,
923 struct rc_instruction
* begin
, struct rc_instruction
* end
)
925 struct schedule_state s
;
928 memset(&s
, 0, sizeof(s
));
931 /* Scan instructions for data dependencies */
933 for(struct rc_instruction
* inst
= begin
; inst
!= end
; inst
= inst
->Next
) {
934 s
.Current
= memory_pool_malloc(&c
->Base
.Pool
, sizeof(*s
.Current
));
935 memset(s
.Current
, 0, sizeof(struct schedule_instruction
));
937 s
.Current
->Instruction
= inst
;
940 DBG("%i: Scanning\n", inst
->IP
);
942 /* The order of things here is subtle and maybe slightly
943 * counter-intuitive, to account for the case where an
944 * instruction writes to the same register as it reads
946 rc_for_all_writes_chan(inst
, &scan_write
, &s
);
947 rc_for_all_reads_chan(inst
, &scan_read
, &s
);
949 DBG("%i: Has %i dependencies\n", inst
->IP
, s
.Current
->NumDependencies
);
951 if (!s
.Current
->NumDependencies
)
952 instruction_ready(&s
, s
.Current
);
954 /* Get global readers for possible RGB->Alpha conversion. */
955 s
.Current
->GlobalReaders
.ExitOnAbort
= 1;
956 rc_get_readers(s
.C
, inst
, &s
.Current
->GlobalReaders
,
957 is_rgb_to_alpha_possible_normal
,
958 is_rgb_to_alpha_possible
, NULL
);
961 /* Temporarily unlink all instructions */
962 begin
->Prev
->Next
= end
;
963 end
->Prev
= begin
->Prev
;
965 /* Schedule instructions back */
967 (s
.ReadyTEX
|| s
.ReadyRGB
|| s
.ReadyAlpha
|| s
.ReadyFullALU
)) {
969 emit_all_tex(&s
, end
);
971 while(!s
.C
->Error
&& (s
.ReadyFullALU
|| s
.ReadyRGB
|| s
.ReadyAlpha
))
972 emit_one_alu(&s
, end
);
976 static int is_controlflow(struct rc_instruction
* inst
)
978 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
979 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
980 return opcode
->IsFlowControl
;
985 void rc_pair_schedule(struct radeon_compiler
*cc
, void *user
)
987 struct schedule_state s
;
989 struct r300_fragment_program_compiler
*c
= (struct r300_fragment_program_compiler
*)cc
;
990 struct rc_instruction
* inst
= c
->Base
.Program
.Instructions
.Next
;
992 memset(&s
, 0, sizeof(s
));
994 while(inst
!= &c
->Base
.Program
.Instructions
) {
995 struct rc_instruction
* first
;
997 if (is_controlflow(inst
)) {
1004 while(inst
!= &c
->Base
.Program
.Instructions
&& !is_controlflow(inst
))
1007 DBG("Schedule one block\n");
1008 schedule_block(c
, first
, inst
);