2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_program_pair.h"
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35 #include "radeon_list.h"
36 #include "radeon_variable.h"
38 #include "util/u_debug.h"
42 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
44 struct schedule_instruction
{
45 struct rc_instruction
* Instruction
;
47 /** Next instruction in the linked list of ready instructions. */
48 struct schedule_instruction
*NextReady
;
50 /** Values that this instruction reads and writes */
51 struct reg_value
* WriteValues
[4];
52 struct reg_value
* ReadValues
[12];
53 unsigned int NumWriteValues
:3;
54 unsigned int NumReadValues
:4;
57 * Number of (read and write) dependencies that must be resolved before
58 * this instruction can be scheduled.
60 unsigned int NumDependencies
:5;
62 /** List of all readers (see rc_get_readers() for the definition of
63 * "all readers"), even those outside the basic block this instruction
65 struct rc_reader_data GlobalReaders
;
67 /** If the scheduler has paired an RGB and an Alpha instruction together,
68 * PairedInst references the alpha insturction's dependency information.
70 struct schedule_instruction
* PairedInst
;
72 /** This scheduler uses the value of Score to determine which
73 * instruction to schedule. Instructions with a higher value of Score
74 * will be scheduled first. */
77 /** The number of components that read from a TEX instruction. */
78 unsigned TexReadCount
;
80 /** For TEX instructions a list of readers */
81 struct rc_list
* TexReaders
;
86 * Used to keep track of which instructions read a value.
88 struct reg_value_reader
{
89 struct schedule_instruction
*Reader
;
90 struct reg_value_reader
*Next
;
94 * Used to keep track which values are stored in each component of a
98 struct schedule_instruction
* Writer
;
101 * Unordered linked list of instructions that read from this value.
102 * When this value becomes available, we increase all readers'
105 struct reg_value_reader
*Readers
;
108 * Number of readers of this value. This is decremented each time
109 * a reader of the value is committed.
110 * When the reader cound reaches zero, the dependency count
111 * of the instruction writing \ref Next is decremented.
113 unsigned int NumReaders
;
115 struct reg_value
*Next
; /**< Pointer to the next value to be written to the same register */
118 struct register_state
{
119 struct reg_value
* Values
[4];
123 struct rc_instruciont
* Inst
;
124 unsigned int OldIndex
:(RC_REGISTER_INDEX_BITS
+1);
125 unsigned int OldSwizzle
:3;
126 unsigned int NewIndex
:(RC_REGISTER_INDEX_BITS
+1);
127 unsigned int NewSwizzle
:3;
128 unsigned int OnlyTexReads
:1;
129 struct remap_reg
* Next
;
132 struct schedule_state
{
133 struct radeon_compiler
* C
;
134 struct schedule_instruction
* Current
;
135 /** Array of the previous writers of Current's destination register
136 * indexed by channel. */
137 struct schedule_instruction
* PrevWriter
[4];
139 struct register_state Temporary
[RC_REGISTER_MAX_INDEX
];
142 * Linked lists of instructions that can be scheduled right now,
143 * based on which ALU/TEX resources they require.
146 struct schedule_instruction
*ReadyFullALU
;
147 struct schedule_instruction
*ReadyRGB
;
148 struct schedule_instruction
*ReadyAlpha
;
149 struct schedule_instruction
*ReadyTEX
;
151 struct rc_list
*PendingTEX
;
153 void (*CalcScore
)(struct schedule_instruction
*);
155 unsigned PrevBlockHasTex
:1;
160 static struct reg_value
** get_reg_valuep(struct schedule_state
* s
,
161 rc_register_file file
, unsigned int index
, unsigned int chan
)
163 if (file
!= RC_FILE_TEMPORARY
)
166 if (index
>= RC_REGISTER_MAX_INDEX
) {
167 rc_error(s
->C
, "%s: index %i out of bounds\n", __FUNCTION__
, index
);
171 return &s
->Temporary
[index
].Values
[chan
];
174 static unsigned get_tex_read_count(struct schedule_instruction
* sinst
)
176 unsigned tex_read_count
= sinst
->TexReadCount
;
177 if (sinst
->PairedInst
) {
178 tex_read_count
+= sinst
->PairedInst
->TexReadCount
;
180 return tex_read_count
;
184 static void print_list(struct schedule_instruction
* sinst
)
186 struct schedule_instruction
* ptr
;
187 for (ptr
= sinst
; ptr
; ptr
=ptr
->NextReady
) {
188 unsigned tex_read_count
= get_tex_read_count(ptr
);
189 unsigned score
= sinst
->Score
;
190 fprintf(stderr
,"%u (%d) [%u],", ptr
->Instruction
->IP
, score
,
193 fprintf(stderr
, "\n");
197 static void remove_inst_from_list(struct schedule_instruction
** list
,
198 struct schedule_instruction
* inst
)
200 struct schedule_instruction
* prev
= NULL
;
201 struct schedule_instruction
* list_ptr
;
202 for (list_ptr
= *list
; list_ptr
; prev
= list_ptr
,
203 list_ptr
= list_ptr
->NextReady
) {
204 if (list_ptr
== inst
) {
206 prev
->NextReady
= inst
->NextReady
;
208 *list
= inst
->NextReady
;
210 inst
->NextReady
= NULL
;
216 static void add_inst_to_list(struct schedule_instruction
** list
, struct schedule_instruction
* inst
)
218 inst
->NextReady
= *list
;
222 static void add_inst_to_list_score(struct schedule_instruction
** list
,
223 struct schedule_instruction
* inst
)
225 struct schedule_instruction
* temp
;
226 struct schedule_instruction
* prev
;
233 while(temp
&& inst
->Score
<= temp
->Score
) {
235 temp
= temp
->NextReady
;
239 inst
->NextReady
= temp
;
242 prev
->NextReady
= inst
;
243 inst
->NextReady
= temp
;
247 static void instruction_ready(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
249 DBG("%i is now ready\n", sinst
->Instruction
->IP
);
251 /* Adding Ready TEX instructions to the end of the "Ready List" helps
252 * us emit TEX instructions in blocks without losing our place. */
253 if (sinst
->Instruction
->Type
== RC_INSTRUCTION_NORMAL
)
254 add_inst_to_list_score(&s
->ReadyTEX
, sinst
);
255 else if (sinst
->Instruction
->U
.P
.Alpha
.Opcode
== RC_OPCODE_NOP
)
256 add_inst_to_list_score(&s
->ReadyRGB
, sinst
);
257 else if (sinst
->Instruction
->U
.P
.RGB
.Opcode
== RC_OPCODE_NOP
)
258 add_inst_to_list_score(&s
->ReadyAlpha
, sinst
);
260 add_inst_to_list_score(&s
->ReadyFullALU
, sinst
);
263 static void decrease_dependencies(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
265 assert(sinst
->NumDependencies
> 0);
266 sinst
->NumDependencies
--;
267 if (!sinst
->NumDependencies
)
268 instruction_ready(s
, sinst
);
271 /* These functions provide different heuristics for scheduling instructions.
272 * The default is calc_score_readers. */
276 static void calc_score_zero(struct schedule_instruction
* sinst
)
281 static void calc_score_deps(struct schedule_instruction
* sinst
)
285 for (i
= 0; i
< sinst
->NumWriteValues
; i
++) {
286 struct reg_value
* v
= sinst
->WriteValues
[i
];
288 struct reg_value_reader
* r
;
289 for (r
= v
->Readers
; r
; r
= r
->Next
) {
290 if (r
->Reader
->NumDependencies
== 1) {
293 sinst
->Score
+= r
->Reader
->NumDependencies
;
301 #define NO_OUTPUT_SCORE (1 << 24)
303 static void score_no_output(struct schedule_instruction
* sinst
)
305 assert(sinst
->Instruction
->Type
!= RC_INSTRUCTION_NORMAL
);
306 if (!sinst
->Instruction
->U
.P
.RGB
.OutputWriteMask
&&
307 !sinst
->Instruction
->U
.P
.Alpha
.OutputWriteMask
) {
308 if (sinst
->PairedInst
) {
309 if (!sinst
->PairedInst
->Instruction
->U
.P
.
311 && !sinst
->PairedInst
->Instruction
->U
.P
.
312 Alpha
.OutputWriteMask
) {
313 sinst
->Score
|= NO_OUTPUT_SCORE
;
317 sinst
->Score
|= NO_OUTPUT_SCORE
;
322 #define PAIRED_SCORE (1 << 16)
324 static void calc_score_r300(struct schedule_instruction
* sinst
)
328 if (sinst
->Instruction
->Type
== RC_INSTRUCTION_NORMAL
) {
333 score_no_output(sinst
);
335 if (sinst
->PairedInst
) {
336 sinst
->Score
|= PAIRED_SCORE
;
340 for (src_idx
= 0; src_idx
< 4; src_idx
++) {
341 sinst
->Score
+= sinst
->Instruction
->U
.P
.RGB
.Src
[src_idx
].Used
+
342 sinst
->Instruction
->U
.P
.Alpha
.Src
[src_idx
].Used
;
346 #define NO_READ_TEX_SCORE (1 << 16)
348 static void calc_score_readers(struct schedule_instruction
* sinst
)
350 if (sinst
->Instruction
->Type
== RC_INSTRUCTION_NORMAL
) {
353 sinst
->Score
= sinst
->NumReadValues
;
354 if (sinst
->PairedInst
) {
355 sinst
->Score
+= sinst
->PairedInst
->NumReadValues
;
357 if (get_tex_read_count(sinst
) == 0) {
358 sinst
->Score
|= NO_READ_TEX_SCORE
;
360 score_no_output(sinst
);
365 * This function decreases the dependencies of the next instruction that
366 * wants to write to each of sinst's read values.
368 static void commit_update_reads(struct schedule_state
* s
,
369 struct schedule_instruction
* sinst
){
371 for(i
= 0; i
< sinst
->NumReadValues
; ++i
) {
372 struct reg_value
* v
= sinst
->ReadValues
[i
];
373 assert(v
->NumReaders
> 0);
375 if (!v
->NumReaders
) {
377 decrease_dependencies(s
, v
->Next
->Writer
);
381 if (sinst
->PairedInst
) {
382 commit_update_reads(s
, sinst
->PairedInst
);
386 static void commit_update_writes(struct schedule_state
* s
,
387 struct schedule_instruction
* sinst
){
389 for(i
= 0; i
< sinst
->NumWriteValues
; ++i
) {
390 struct reg_value
* v
= sinst
->WriteValues
[i
];
392 for(struct reg_value_reader
* r
= v
->Readers
; r
; r
= r
->Next
) {
393 decrease_dependencies(s
, r
->Reader
);
396 /* This happens in instruction sequences of the type
399 * See also the subtlety in how instructions that both
400 * read and write the same register are scanned.
403 decrease_dependencies(s
, v
->Next
->Writer
);
406 if (sinst
->PairedInst
) {
407 commit_update_writes(s
, sinst
->PairedInst
);
411 static void notify_sem_wait(struct schedule_state
*s
)
413 struct rc_list
* pend_ptr
;
414 for (pend_ptr
= s
->PendingTEX
; pend_ptr
; pend_ptr
= pend_ptr
->Next
) {
415 struct rc_list
* read_ptr
;
416 struct schedule_instruction
* pending
= pend_ptr
->Item
;
417 for (read_ptr
= pending
->TexReaders
; read_ptr
;
418 read_ptr
= read_ptr
->Next
) {
419 struct schedule_instruction
* reader
= read_ptr
->Item
;
420 reader
->TexReadCount
--;
423 s
->PendingTEX
= NULL
;
426 static void commit_alu_instruction(struct schedule_state
* s
, struct schedule_instruction
* sinst
)
428 DBG("%i: commit score = %d\n", sinst
->Instruction
->IP
, sinst
->Score
);
430 commit_update_reads(s
, sinst
);
432 commit_update_writes(s
, sinst
);
434 if (get_tex_read_count(sinst
) > 0) {
435 sinst
->Instruction
->U
.P
.SemWait
= 1;
441 * Emit all ready texture instructions in a single block.
443 * Emit as a single block to (hopefully) sample many textures in parallel,
444 * and to avoid hardware indirections on R300.
446 static void emit_all_tex(struct schedule_state
* s
, struct rc_instruction
* before
)
448 struct schedule_instruction
*readytex
;
449 struct rc_instruction
* inst_begin
;
454 /* Node marker for R300 */
455 inst_begin
= rc_insert_new_instruction(s
->C
, before
->Prev
);
456 inst_begin
->U
.I
.Opcode
= RC_OPCODE_BEGIN_TEX
;
458 /* Link texture instructions back in */
459 readytex
= s
->ReadyTEX
;
461 rc_insert_instruction(before
->Prev
, readytex
->Instruction
);
462 DBG("%i: commit TEX reads\n", readytex
->Instruction
->IP
);
464 /* All of the TEX instructions in the same TEX block have
465 * their source registers read from before any of the
466 * instructions in that block write to their destination
467 * registers. This means that when we commit a TEX
468 * instruction, any other TEX instruction that wants to write
469 * to one of the committed instruction's source register can be
470 * marked as ready and should be emitted in the same TEX
471 * block. This prevents the following sequence from being
472 * emitted in two different TEX blocks:
473 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
474 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
476 commit_update_reads(s
, readytex
);
477 readytex
= readytex
->NextReady
;
479 readytex
= s
->ReadyTEX
;
482 DBG("%i: commit TEX writes\n", readytex
->Instruction
->IP
);
483 commit_update_writes(s
, readytex
);
484 /* Set semaphore bits for last TEX instruction in the block */
485 if (!readytex
->NextReady
) {
486 readytex
->Instruction
->U
.I
.TexSemAcquire
= 1;
487 readytex
->Instruction
->U
.I
.TexSemWait
= 1;
489 rc_list_add(&s
->PendingTEX
, rc_list(&s
->C
->Pool
, readytex
));
490 readytex
= readytex
->NextReady
;
494 /* This is a helper function for destructive_merge_instructions(). It helps
495 * merge presubtract sources from two instructions and makes sure the
496 * presubtract sources end up in the correct spot. This function assumes that
497 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
498 * but no scalar instruction (alpha).
499 * @return 0 if merging the presubtract sources fails.
500 * @retrun 1 if merging the presubtract sources succeeds.
502 static int merge_presub_sources(
503 struct rc_pair_instruction
* dst_full
,
504 struct rc_pair_sub_instruction src
,
507 unsigned int srcp_src
, srcp_regs
, is_rgb
, is_alpha
;
508 struct rc_pair_sub_instruction
* dst_sub
;
509 const struct rc_opcode_info
* info
;
511 assert(dst_full
->Alpha
.Opcode
== RC_OPCODE_NOP
);
517 dst_sub
= &dst_full
->RGB
;
519 case RC_SOURCE_ALPHA
:
522 dst_sub
= &dst_full
->Alpha
;
529 info
= rc_get_opcode_info(dst_full
->RGB
.Opcode
);
531 if (dst_sub
->Src
[RC_PAIR_PRESUB_SRC
].Used
)
534 srcp_regs
= rc_presubtract_src_reg_count(
535 src
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
536 for(srcp_src
= 0; srcp_src
< srcp_regs
; srcp_src
++) {
539 unsigned int one_way
= 0;
540 struct rc_pair_instruction_source srcp
= src
.Src
[srcp_src
];
541 struct rc_pair_instruction_source temp
;
543 free_source
= rc_pair_alloc_source(dst_full
, is_rgb
, is_alpha
,
544 srcp
.File
, srcp
.Index
);
546 /* If free_source < 0 then there are no free source
551 temp
= dst_sub
->Src
[srcp_src
];
552 dst_sub
->Src
[srcp_src
] = dst_sub
->Src
[free_source
];
554 /* srcp needs src0 and src1 to be the same */
555 if (free_source
< srcp_src
) {
558 free_source
= rc_pair_alloc_source(dst_full
, is_rgb
,
559 is_alpha
, temp
.File
, temp
.Index
);
564 dst_sub
->Src
[free_source
] = temp
;
567 /* If free_source == srcp_src, then the presubtract
568 * source is already in the correct place. */
569 if (free_source
== srcp_src
)
572 /* Shuffle the sources, so we can put the
573 * presubtract source in the correct place. */
574 for(arg
= 0; arg
< info
->NumSrcRegs
; arg
++) {
575 /*If this arg does not read from an rgb source,
577 if (!(rc_source_type_swz(dst_full
->RGB
.Arg
[arg
].Swizzle
)
582 if (dst_full
->RGB
.Arg
[arg
].Source
== srcp_src
)
583 dst_full
->RGB
.Arg
[arg
].Source
= free_source
;
584 /* We need to do this just in case register
585 * is one of the sources already, but in the
587 else if(dst_full
->RGB
.Arg
[arg
].Source
== free_source
589 dst_full
->RGB
.Arg
[arg
].Source
= srcp_src
;
597 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
598 static int destructive_merge_instructions(
599 struct rc_pair_instruction
* rgb
,
600 struct rc_pair_instruction
* alpha
)
602 const struct rc_opcode_info
* opcode
;
604 assert(rgb
->Alpha
.Opcode
== RC_OPCODE_NOP
);
605 assert(alpha
->RGB
.Opcode
== RC_OPCODE_NOP
);
607 /* Presubtract registers need to be merged first so that registers
608 * needed by the presubtract operation can be placed in src0 and/or
611 /* Merge the rgb presubtract registers. */
612 if (alpha
->RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
613 if (!merge_presub_sources(rgb
, alpha
->RGB
, RC_SOURCE_RGB
)) {
617 /* Merge the alpha presubtract registers */
618 if (alpha
->Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
619 if(!merge_presub_sources(rgb
, alpha
->Alpha
, RC_SOURCE_ALPHA
)){
624 /* Copy alpha args into rgb */
625 opcode
= rc_get_opcode_info(alpha
->Alpha
.Opcode
);
627 for(unsigned int arg
= 0; arg
< opcode
->NumSrcRegs
; ++arg
) {
628 unsigned int srcrgb
= 0;
629 unsigned int srcalpha
= 0;
630 unsigned int oldsrc
= alpha
->Alpha
.Arg
[arg
].Source
;
631 rc_register_file file
= 0;
632 unsigned int index
= 0;
635 if (GET_SWZ(alpha
->Alpha
.Arg
[arg
].Swizzle
, 0) < 3) {
637 file
= alpha
->RGB
.Src
[oldsrc
].File
;
638 index
= alpha
->RGB
.Src
[oldsrc
].Index
;
639 } else if (GET_SWZ(alpha
->Alpha
.Arg
[arg
].Swizzle
, 0) < 4) {
641 file
= alpha
->Alpha
.Src
[oldsrc
].File
;
642 index
= alpha
->Alpha
.Src
[oldsrc
].Index
;
645 source
= rc_pair_alloc_source(rgb
, srcrgb
, srcalpha
, file
, index
);
649 rgb
->Alpha
.Arg
[arg
].Source
= source
;
650 rgb
->Alpha
.Arg
[arg
].Swizzle
= alpha
->Alpha
.Arg
[arg
].Swizzle
;
651 rgb
->Alpha
.Arg
[arg
].Abs
= alpha
->Alpha
.Arg
[arg
].Abs
;
652 rgb
->Alpha
.Arg
[arg
].Negate
= alpha
->Alpha
.Arg
[arg
].Negate
;
655 /* Copy alpha opcode into rgb */
656 rgb
->Alpha
.Opcode
= alpha
->Alpha
.Opcode
;
657 rgb
->Alpha
.DestIndex
= alpha
->Alpha
.DestIndex
;
658 rgb
->Alpha
.WriteMask
= alpha
->Alpha
.WriteMask
;
659 rgb
->Alpha
.OutputWriteMask
= alpha
->Alpha
.OutputWriteMask
;
660 rgb
->Alpha
.DepthWriteMask
= alpha
->Alpha
.DepthWriteMask
;
661 rgb
->Alpha
.Saturate
= alpha
->Alpha
.Saturate
;
662 rgb
->Alpha
.Omod
= alpha
->Alpha
.Omod
;
664 /* Merge ALU result writing */
665 if (alpha
->WriteALUResult
) {
666 if (rgb
->WriteALUResult
)
669 rgb
->WriteALUResult
= alpha
->WriteALUResult
;
670 rgb
->ALUResultCompare
= alpha
->ALUResultCompare
;
674 rgb
->SemWait
|= alpha
->SemWait
;
680 * Try to merge the given instructions into the rgb instructions.
682 * Return true on success; on failure, return false, and keep
683 * the instructions untouched.
685 static int merge_instructions(struct rc_pair_instruction
* rgb
, struct rc_pair_instruction
* alpha
)
687 struct rc_pair_instruction backup
;
689 /*Instructions can't write output registers and ALU result at the
691 if ((rgb
->WriteALUResult
&& alpha
->Alpha
.OutputWriteMask
)
692 || (rgb
->RGB
.OutputWriteMask
&& alpha
->WriteALUResult
)) {
696 /* Writing output registers in the middle of shaders is slow, so
697 * we don't want to pair output writes with temp writes. */
698 if ((rgb
->RGB
.OutputWriteMask
&& !alpha
->Alpha
.OutputWriteMask
)
699 || (!rgb
->RGB
.OutputWriteMask
&& alpha
->Alpha
.OutputWriteMask
)) {
703 memcpy(&backup
, rgb
, sizeof(struct rc_pair_instruction
));
705 if (destructive_merge_instructions(rgb
, alpha
))
708 memcpy(rgb
, &backup
, sizeof(struct rc_pair_instruction
));
712 static void presub_nop(struct rc_instruction
* emitted
) {
713 int prev_rgb_index
, prev_alpha_index
, i
, num_src
;
715 /* We don't need a nop if the previous instruction is a TEX. */
716 if (emitted
->Prev
->Type
!= RC_INSTRUCTION_PAIR
) {
719 if (emitted
->Prev
->U
.P
.RGB
.WriteMask
)
720 prev_rgb_index
= emitted
->Prev
->U
.P
.RGB
.DestIndex
;
723 if (emitted
->Prev
->U
.P
.Alpha
.WriteMask
)
724 prev_alpha_index
= emitted
->Prev
->U
.P
.Alpha
.DestIndex
;
726 prev_alpha_index
= 1;
728 /* Check the previous rgb instruction */
729 if (emitted
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Used
) {
730 num_src
= rc_presubtract_src_reg_count(
731 emitted
->U
.P
.RGB
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
732 for (i
= 0; i
< num_src
; i
++) {
733 unsigned int index
= emitted
->U
.P
.RGB
.Src
[i
].Index
;
734 if (emitted
->U
.P
.RGB
.Src
[i
].File
== RC_FILE_TEMPORARY
735 && (index
== prev_rgb_index
736 || index
== prev_alpha_index
)) {
737 emitted
->Prev
->U
.P
.Nop
= 1;
743 /* Check the previous alpha instruction. */
744 if (!emitted
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Used
)
747 num_src
= rc_presubtract_src_reg_count(
748 emitted
->U
.P
.Alpha
.Src
[RC_PAIR_PRESUB_SRC
].Index
);
749 for (i
= 0; i
< num_src
; i
++) {
750 unsigned int index
= emitted
->U
.P
.Alpha
.Src
[i
].Index
;
751 if(emitted
->U
.P
.Alpha
.Src
[i
].File
== RC_FILE_TEMPORARY
752 && (index
== prev_rgb_index
|| index
== prev_alpha_index
)) {
753 emitted
->Prev
->U
.P
.Nop
= 1;
759 static void rgb_to_alpha_remap (
760 struct rc_instruction
* inst
,
761 struct rc_pair_instruction_arg
* arg
,
762 rc_register_file old_file
,
764 unsigned int new_index
)
769 for (i
= 0; i
< 3; i
++) {
770 if (get_swz(arg
->Swizzle
, i
) == old_swz
) {
771 SET_SWZ(arg
->Swizzle
, i
, RC_SWIZZLE_W
);
774 new_src_index
= rc_pair_alloc_source(&inst
->U
.P
, 0, 1,
775 old_file
, new_index
);
776 /* This conversion is not possible, we must have made a mistake in
777 * is_rgb_to_alpha_possible. */
778 if (new_src_index
< 0) {
783 arg
->Source
= new_src_index
;
786 static int can_remap(unsigned int opcode
)
797 static int can_convert_opcode_to_alpha(unsigned int opcode
)
812 static void is_rgb_to_alpha_possible(
814 struct rc_instruction
* inst
,
815 struct rc_pair_instruction_arg
* arg
,
816 struct rc_pair_instruction_source
* src
)
818 unsigned int read_chan
= RC_SWIZZLE_UNUSED
;
819 unsigned int alpha_sources
= 0;
821 struct rc_reader_data
* reader_data
= userdata
;
823 if (!can_remap(inst
->U
.P
.RGB
.Opcode
)
824 || !can_remap(inst
->U
.P
.Alpha
.Opcode
)) {
825 reader_data
->Abort
= 1;
832 /* XXX There are some cases where we can still do the conversion if
833 * a reader reads from a presubtract source, but for now we'll prevent
835 if (arg
->Source
== RC_PAIR_PRESUB_SRC
) {
836 reader_data
->Abort
= 1;
840 /* Make sure the source only reads the register component that we
841 * are going to be convering from. It is OK if the instruction uses
842 * this component more than once.
843 * XXX If the index we will be converting to is the same as the
844 * current index, then it is OK to read from more than one component.
846 for (i
= 0; i
< 3; i
++) {
847 rc_swizzle swz
= get_swz(arg
->Swizzle
, i
);
853 if (read_chan
== RC_SWIZZLE_UNUSED
) {
855 } else if (read_chan
!= swz
) {
856 reader_data
->Abort
= 1;
865 /* Make sure there are enough alpha sources.
866 * XXX If we know what register all the readers are going
867 * to be remapped to, then in some situations we can still do
868 * the subsitution, even if all 3 alpha sources are being used.*/
869 for (i
= 0; i
< 3; i
++) {
870 if (inst
->U
.P
.Alpha
.Src
[i
].Used
) {
874 if (alpha_sources
> 2) {
875 reader_data
->Abort
= 1;
880 static int convert_rgb_to_alpha(
881 struct schedule_state
* s
,
882 struct schedule_instruction
* sched_inst
)
884 struct rc_pair_instruction
* pair_inst
= &sched_inst
->Instruction
->U
.P
;
885 unsigned int old_mask
= pair_inst
->RGB
.WriteMask
;
886 unsigned int old_swz
= rc_mask_to_swizzle(old_mask
);
887 const struct rc_opcode_info
* info
=
888 rc_get_opcode_info(pair_inst
->RGB
.Opcode
);
892 if (sched_inst
->GlobalReaders
.Abort
)
895 if (!pair_inst
->RGB
.WriteMask
)
898 if (!can_convert_opcode_to_alpha(pair_inst
->RGB
.Opcode
)
899 || !can_convert_opcode_to_alpha(pair_inst
->Alpha
.Opcode
)) {
903 assert(sched_inst
->NumWriteValues
== 1);
905 if (!sched_inst
->WriteValues
[0]) {
910 /* We start at the old index, because if we can reuse the same
911 * register and just change the swizzle then it is more likely we
912 * will be able to convert all the readers. */
913 for (i
= pair_inst
->RGB
.DestIndex
; i
< RC_REGISTER_MAX_INDEX
; i
++) {
914 struct reg_value
** new_regvalp
= get_reg_valuep(
915 s
, RC_FILE_TEMPORARY
, i
, 3);
917 struct reg_value
** old_regvalp
=
920 pair_inst
->RGB
.DestIndex
,
921 rc_mask_to_swizzle(old_mask
));
923 *new_regvalp
= *old_regvalp
;
925 new_regvalp
= get_reg_valuep(s
, RC_FILE_TEMPORARY
, i
, 3);
933 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
934 * as the RGB opcode, then the Alpha instruction will already contain
935 * the correct opcode and instruction args, so we do not want to
938 if (pair_inst
->RGB
.Opcode
!= RC_OPCODE_REPL_ALPHA
) {
939 pair_inst
->Alpha
.Opcode
= pair_inst
->RGB
.Opcode
;
940 memcpy(pair_inst
->Alpha
.Arg
, pair_inst
->RGB
.Arg
,
941 sizeof(pair_inst
->Alpha
.Arg
));
943 pair_inst
->Alpha
.DestIndex
= new_index
;
944 pair_inst
->Alpha
.WriteMask
= RC_MASK_W
;
945 pair_inst
->Alpha
.Target
= pair_inst
->RGB
.Target
;
946 pair_inst
->Alpha
.OutputWriteMask
= pair_inst
->RGB
.OutputWriteMask
;
947 pair_inst
->Alpha
.DepthWriteMask
= pair_inst
->RGB
.DepthWriteMask
;
948 pair_inst
->Alpha
.Saturate
= pair_inst
->RGB
.Saturate
;
949 pair_inst
->Alpha
.Omod
= pair_inst
->RGB
.Omod
;
950 /* Move the swizzles into the first chan */
951 for (i
= 0; i
< info
->NumSrcRegs
; i
++) {
953 for (j
= 0; j
< 3; j
++) {
954 unsigned int swz
= get_swz(pair_inst
->Alpha
.Arg
[i
].Swizzle
, j
);
955 if (swz
!= RC_SWIZZLE_UNUSED
) {
956 pair_inst
->Alpha
.Arg
[i
].Swizzle
=
957 rc_init_swizzle(swz
, 1);
962 pair_inst
->RGB
.Opcode
= RC_OPCODE_NOP
;
963 pair_inst
->RGB
.DestIndex
= 0;
964 pair_inst
->RGB
.WriteMask
= 0;
965 pair_inst
->RGB
.Target
= 0;
966 pair_inst
->RGB
.OutputWriteMask
= 0;
967 pair_inst
->RGB
.DepthWriteMask
= 0;
968 pair_inst
->RGB
.Saturate
= 0;
969 memset(pair_inst
->RGB
.Arg
, 0, sizeof(pair_inst
->RGB
.Arg
));
971 for(i
= 0; i
< sched_inst
->GlobalReaders
.ReaderCount
; i
++) {
972 struct rc_reader reader
= sched_inst
->GlobalReaders
.Readers
[i
];
973 rgb_to_alpha_remap(reader
.Inst
, reader
.U
.P
.Arg
,
974 RC_FILE_TEMPORARY
, old_swz
, new_index
);
979 static void try_convert_and_pair(
980 struct schedule_state
*s
,
981 struct schedule_instruction
** inst_list
)
983 struct schedule_instruction
* list_ptr
= *inst_list
;
984 while (list_ptr
&& *inst_list
&& (*inst_list
)->NextReady
) {
986 if (list_ptr
->Instruction
->U
.P
.Alpha
.Opcode
!= RC_OPCODE_NOP
987 && list_ptr
->Instruction
->U
.P
.RGB
.Opcode
988 != RC_OPCODE_REPL_ALPHA
) {
991 if (list_ptr
->NumWriteValues
== 1
992 && convert_rgb_to_alpha(s
, list_ptr
)) {
994 struct schedule_instruction
* pair_ptr
;
995 remove_inst_from_list(inst_list
, list_ptr
);
996 add_inst_to_list_score(&s
->ReadyAlpha
, list_ptr
);
998 for (pair_ptr
= s
->ReadyRGB
; pair_ptr
;
999 pair_ptr
= pair_ptr
->NextReady
) {
1000 if (merge_instructions(&pair_ptr
->Instruction
->U
.P
,
1001 &list_ptr
->Instruction
->U
.P
)) {
1002 remove_inst_from_list(&s
->ReadyAlpha
, list_ptr
);
1003 remove_inst_from_list(&s
->ReadyRGB
, pair_ptr
);
1004 pair_ptr
->PairedInst
= list_ptr
;
1006 add_inst_to_list(&s
->ReadyFullALU
, pair_ptr
);
1007 list_ptr
= *inst_list
;
1016 list_ptr
= list_ptr
->NextReady
;
1022 * This function attempts to merge RGB and Alpha instructions together.
1024 static void pair_instructions(struct schedule_state
* s
)
1026 struct schedule_instruction
*rgb_ptr
;
1027 struct schedule_instruction
*alpha_ptr
;
1029 /* Some pairings might fail because they require too
1030 * many source slots; try all possible pairings if necessary */
1031 rgb_ptr
= s
->ReadyRGB
;
1033 struct schedule_instruction
* rgb_next
= rgb_ptr
->NextReady
;
1034 alpha_ptr
= s
->ReadyAlpha
;
1036 struct schedule_instruction
* alpha_next
= alpha_ptr
->NextReady
;
1037 if (merge_instructions(&rgb_ptr
->Instruction
->U
.P
, &alpha_ptr
->Instruction
->U
.P
)) {
1038 /* Remove RGB and Alpha from their ready lists.
1040 remove_inst_from_list(&s
->ReadyRGB
, rgb_ptr
);
1041 remove_inst_from_list(&s
->ReadyAlpha
, alpha_ptr
);
1042 rgb_ptr
->PairedInst
= alpha_ptr
;
1043 add_inst_to_list(&s
->ReadyFullALU
, rgb_ptr
);
1046 alpha_ptr
= alpha_next
;
1055 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1056 * slot can be converted into Alpha instructions. */
1057 try_convert_and_pair(s
, &s
->ReadyFullALU
);
1059 /* Try to convert some of the RGB instructions to Alpha and
1060 * try to pair it with another RGB. */
1061 try_convert_and_pair(s
, &s
->ReadyRGB
);
1064 static void update_max_score(
1065 struct schedule_state
* s
,
1066 struct schedule_instruction
** list
,
1068 struct schedule_instruction
** max_inst_out
,
1069 struct schedule_instruction
*** list_out
)
1071 struct schedule_instruction
* list_ptr
;
1072 for (list_ptr
= *list
; list_ptr
; list_ptr
= list_ptr
->NextReady
) {
1074 s
->CalcScore(list_ptr
);
1075 score
= list_ptr
->Score
;
1076 if (!*max_inst_out
|| score
> *max_score
) {
1078 *max_inst_out
= list_ptr
;
1084 static void emit_instruction(
1085 struct schedule_state
* s
,
1086 struct rc_instruction
* before
)
1089 struct schedule_instruction
* max_inst
= NULL
;
1090 struct schedule_instruction
** max_list
= NULL
;
1091 unsigned tex_count
= 0;
1092 struct schedule_instruction
* tex_ptr
;
1094 pair_instructions(s
);
1096 fprintf(stderr
, "Full:\n");
1097 print_list(s
->ReadyFullALU
);
1098 fprintf(stderr
, "RGB:\n");
1099 print_list(s
->ReadyRGB
);
1100 fprintf(stderr
, "Alpha:\n");
1101 print_list(s
->ReadyAlpha
);
1102 fprintf(stderr
, "TEX:\n");
1103 print_list(s
->ReadyTEX
);
1106 for (tex_ptr
= s
->ReadyTEX
; tex_ptr
; tex_ptr
= tex_ptr
->NextReady
) {
1107 if (tex_ptr
->Instruction
->U
.I
.Opcode
== RC_OPCODE_KIL
) {
1108 emit_all_tex(s
, before
);
1113 update_max_score(s
, &s
->ReadyFullALU
, &max_score
, &max_inst
, &max_list
);
1114 update_max_score(s
, &s
->ReadyRGB
, &max_score
, &max_inst
, &max_list
);
1115 update_max_score(s
, &s
->ReadyAlpha
, &max_score
, &max_inst
, &max_list
);
1117 if (tex_count
>= s
->max_tex_group
|| max_score
== -1
1118 || (s
->TEXCount
> 0 && tex_count
== s
->TEXCount
)
1119 || (!s
->C
->is_r500
&& tex_count
> 0 && max_score
== -1)) {
1120 emit_all_tex(s
, before
);
1124 remove_inst_from_list(max_list
, max_inst
);
1125 rc_insert_instruction(before
->Prev
, max_inst
->Instruction
);
1126 commit_alu_instruction(s
, max_inst
);
1128 presub_nop(before
->Prev
);
1132 static void add_tex_reader(
1133 struct schedule_state
* s
,
1134 struct schedule_instruction
* writer
,
1135 struct schedule_instruction
* reader
)
1137 if (!writer
|| writer
->Instruction
->Type
!= RC_INSTRUCTION_NORMAL
) {
1138 /*Not a TEX instructions */
1141 reader
->TexReadCount
++;
1142 rc_list_add(&writer
->TexReaders
, rc_list(&s
->C
->Pool
, reader
));
1145 static void scan_read(void * data
, struct rc_instruction
* inst
,
1146 rc_register_file file
, unsigned int index
, unsigned int chan
)
1148 struct schedule_state
* s
= data
;
1149 struct reg_value
** v
= get_reg_valuep(s
, file
, index
, chan
);
1150 struct reg_value_reader
* reader
;
1155 if (*v
&& (*v
)->Writer
== s
->Current
) {
1156 /* The instruction reads and writes to a register component.
1157 * In this case, we only want to increment dependencies by one.
1159 * Because each instruction depends on the writers of its source
1160 * registers _and_ the most recent writer of its destination
1161 * register. In this case, the current instruction (s->Current)
1162 * has a dependency that both writes to one of its source
1163 * registers and was the most recent writer to its destination
1164 * register. We have already marked this dependency in
1165 * scan_write(), so we don't need to do it again.
1168 /* We need to make sure we are adding s->Current to the
1169 * previous writer's list of TexReaders, if the previous writer
1170 * was a TEX instruction.
1172 add_tex_reader(s
, s
->PrevWriter
[chan
], s
->Current
);
1177 DBG("%i: read %i[%i] chan %i\n", s
->Current
->Instruction
->IP
, file
, index
, chan
);
1179 reader
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*reader
));
1180 reader
->Reader
= s
->Current
;
1182 /* In this situation, the instruction reads from a register
1183 * that hasn't been written to or read from in the current
1185 *v
= memory_pool_malloc(&s
->C
->Pool
, sizeof(struct reg_value
));
1186 memset(*v
, 0, sizeof(struct reg_value
));
1187 (*v
)->Readers
= reader
;
1189 reader
->Next
= (*v
)->Readers
;
1190 (*v
)->Readers
= reader
;
1191 /* Only update the current instruction's dependencies if the
1192 * register it reads from has been written to in this block. */
1194 add_tex_reader(s
, (*v
)->Writer
, s
->Current
);
1195 s
->Current
->NumDependencies
++;
1200 if (s
->Current
->NumReadValues
>= 12) {
1201 rc_error(s
->C
, "%s: NumReadValues overflow\n", __FUNCTION__
);
1203 s
->Current
->ReadValues
[s
->Current
->NumReadValues
++] = *v
;
1207 static void scan_write(void * data
, struct rc_instruction
* inst
,
1208 rc_register_file file
, unsigned int index
, unsigned int chan
)
1210 struct schedule_state
* s
= data
;
1211 struct reg_value
** pv
= get_reg_valuep(s
, file
, index
, chan
);
1212 struct reg_value
* newv
;
1217 DBG("%i: write %i[%i] chan %i\n", s
->Current
->Instruction
->IP
, file
, index
, chan
);
1219 newv
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*newv
));
1220 memset(newv
, 0, sizeof(*newv
));
1222 newv
->Writer
= s
->Current
;
1226 s
->Current
->NumDependencies
++;
1227 /* Keep track of the previous writer to s->Current's destination
1229 s
->PrevWriter
[chan
] = (*pv
)->Writer
;
1234 if (s
->Current
->NumWriteValues
>= 4) {
1235 rc_error(s
->C
, "%s: NumWriteValues overflow\n", __FUNCTION__
);
1237 s
->Current
->WriteValues
[s
->Current
->NumWriteValues
++] = newv
;
1241 static void is_rgb_to_alpha_possible_normal(
1243 struct rc_instruction
* inst
,
1244 struct rc_src_register
* src
)
1246 struct rc_reader_data
* reader_data
= userdata
;
1247 reader_data
->Abort
= 1;
1251 static void schedule_block(struct schedule_state
* s
,
1252 struct rc_instruction
* begin
, struct rc_instruction
* end
)
1256 /* Scan instructions for data dependencies */
1258 for(struct rc_instruction
* inst
= begin
; inst
!= end
; inst
= inst
->Next
) {
1259 s
->Current
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*s
->Current
));
1260 memset(s
->Current
, 0, sizeof(struct schedule_instruction
));
1262 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
1263 const struct rc_opcode_info
* info
=
1264 rc_get_opcode_info(inst
->U
.I
.Opcode
);
1265 if (info
->HasTexture
) {
1270 /* XXX: This causes SemWait to be set for all instructions in
1271 * a block if the previous block contained a TEX instruction.
1272 * We can do better here, but it will take a lot of work. */
1273 if (s
->PrevBlockHasTex
) {
1274 s
->Current
->TexReadCount
= 1;
1277 s
->Current
->Instruction
= inst
;
1280 DBG("%i: Scanning\n", inst
->IP
);
1282 /* The order of things here is subtle and maybe slightly
1283 * counter-intuitive, to account for the case where an
1284 * instruction writes to the same register as it reads
1286 rc_for_all_writes_chan(inst
, &scan_write
, s
);
1287 rc_for_all_reads_chan(inst
, &scan_read
, s
);
1289 DBG("%i: Has %i dependencies\n", inst
->IP
, s
->Current
->NumDependencies
);
1291 if (!s
->Current
->NumDependencies
) {
1292 instruction_ready(s
, s
->Current
);
1295 /* Get global readers for possible RGB->Alpha conversion. */
1296 s
->Current
->GlobalReaders
.ExitOnAbort
= 1;
1297 rc_get_readers(s
->C
, inst
, &s
->Current
->GlobalReaders
,
1298 is_rgb_to_alpha_possible_normal
,
1299 is_rgb_to_alpha_possible
, NULL
);
1302 /* Temporarily unlink all instructions */
1303 begin
->Prev
->Next
= end
;
1304 end
->Prev
= begin
->Prev
;
1306 /* Schedule instructions back */
1307 while(!s
->C
->Error
&&
1308 (s
->ReadyTEX
|| s
->ReadyRGB
|| s
->ReadyAlpha
|| s
->ReadyFullALU
)) {
1309 emit_instruction(s
, end
);
1313 static int is_controlflow(struct rc_instruction
* inst
)
1315 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
1316 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
1317 return opcode
->IsFlowControl
;
1322 void rc_pair_schedule(struct radeon_compiler
*cc
, void *user
)
1324 struct r300_fragment_program_compiler
*c
= (struct r300_fragment_program_compiler
*)cc
;
1325 struct schedule_state s
;
1326 struct rc_instruction
* inst
= c
->Base
.Program
.Instructions
.Next
;
1327 unsigned int * opt
= user
;
1329 memset(&s
, 0, sizeof(s
));
1333 s
.CalcScore
= calc_score_readers
;
1335 s
.CalcScore
= calc_score_r300
;
1337 s
.max_tex_group
= debug_get_num_option("RADEON_TEX_GROUP", 8);
1338 while(inst
!= &c
->Base
.Program
.Instructions
) {
1339 struct rc_instruction
* first
;
1341 if (is_controlflow(inst
)) {
1348 while(inst
!= &c
->Base
.Program
.Instructions
&& !is_controlflow(inst
))
1351 DBG("Schedule one block\n");
1352 memset(s
.Temporary
, 0, sizeof(s
.Temporary
));
1354 schedule_block(&s
, first
, inst
);
1356 s
.PrevBlockHasTex
= 1;