2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_program_pair.h"
33 #include "main/glheader.h"
34 #include "program/register_allocate.h"
37 #include "r300_fragprog_swizzle.h"
38 #include "radeon_compiler.h"
39 #include "radeon_compiler_util.h"
40 #include "radeon_dataflow.h"
41 #include "radeon_list.h"
42 #include "radeon_variable.h"
46 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
50 struct register_info
{
51 struct live_intervals Live
[4];
54 unsigned int Allocated
:1;
56 unsigned int Index
:RC_REGISTER_INDEX_BITS
;
57 unsigned int Writemask
;
60 struct regalloc_state
{
61 struct radeon_compiler
* C
;
63 struct register_info
* Input
;
64 unsigned int NumInputs
;
66 struct register_info
* Temporary
;
67 unsigned int NumTemporaries
;
78 RC_REG_CLASS_SINGLE_PLUS_ALPHA
,
79 RC_REG_CLASS_DOUBLE_PLUS_ALPHA
,
80 RC_REG_CLASS_TRIPLE_PLUS_ALPHA
,
97 enum rc_reg_class Class
;
99 unsigned int WritemaskCount
;
101 /** This is 1 if this class is being used by the register allocator
105 /** This is the ID number assigned to this class by ra. */
108 /** List of writemasks that belong to this class */
109 unsigned int Writemasks
[3];
114 static void print_live_intervals(struct live_intervals
* src
)
116 if (!src
|| !src
->Used
) {
121 DBG("(%i,%i)", src
->Start
, src
->End
);
124 static int overlap_live_intervals(struct live_intervals
* a
, struct live_intervals
* b
)
127 DBG("overlap_live_intervals: ");
128 print_live_intervals(a
);
130 print_live_intervals(b
);
134 if (!a
->Used
|| !b
->Used
) {
135 DBG(" unused interval\n");
139 if (a
->Start
> b
->Start
) {
140 if (a
->Start
< b
->End
) {
144 } else if (b
->Start
> a
->Start
) {
145 if (b
->Start
< a
->End
) {
149 } else { /* a->Start == b->Start */
150 if (a
->Start
!= a
->End
&& b
->Start
!= b
->End
) {
156 DBG(" no overlap\n");
161 static void scan_read_callback(void * data
, struct rc_instruction
* inst
,
162 rc_register_file file
, unsigned int index
, unsigned int mask
)
164 struct regalloc_state
* s
= data
;
165 struct register_info
* reg
;
168 if (file
!= RC_FILE_INPUT
)
171 s
->Input
[index
].Used
= 1;
172 reg
= &s
->Input
[index
];
174 for (i
= 0; i
< 4; i
++) {
175 if (!((mask
>> i
) & 0x1)) {
178 reg
->Live
[i
].Used
= 1;
179 reg
->Live
[i
].Start
= 0;
181 s
->LoopEnd
> inst
->IP
? s
->LoopEnd
: inst
->IP
;
185 static void remap_register(void * data
, struct rc_instruction
* inst
,
186 rc_register_file
* file
, unsigned int * index
)
188 struct regalloc_state
* s
= data
;
189 const struct register_info
* reg
;
191 if (*file
== RC_FILE_TEMPORARY
&& s
->Simple
)
192 reg
= &s
->Temporary
[*index
];
193 else if (*file
== RC_FILE_INPUT
)
194 reg
= &s
->Input
[*index
];
198 if (reg
->Allocated
) {
203 static void alloc_input_simple(void * data
, unsigned int input
,
206 struct regalloc_state
* s
= data
;
208 if (input
>= s
->NumInputs
)
211 s
->Input
[input
].Allocated
= 1;
212 s
->Input
[input
].File
= RC_FILE_TEMPORARY
;
213 s
->Input
[input
].Index
= hwreg
;
216 /* This functions offsets the temporary register indices by the number
217 * of input registers, because input registers are actually temporaries and
218 * should not occupy the same space.
220 * This pass is supposed to be used to maintain correct allocation of inputs
221 * if the standard register allocation is disabled. */
222 static void do_regalloc_inputs_only(struct regalloc_state
* s
)
224 for (unsigned i
= 0; i
< s
->NumTemporaries
; i
++) {
225 s
->Temporary
[i
].Allocated
= 1;
226 s
->Temporary
[i
].File
= RC_FILE_TEMPORARY
;
227 s
->Temporary
[i
].Index
= i
+ s
->NumInputs
;
231 static unsigned int is_derivative(rc_opcode op
)
233 return (op
== RC_OPCODE_DDX
|| op
== RC_OPCODE_DDY
);
236 static int find_class(
237 struct rc_class
* classes
,
238 unsigned int writemask
,
239 unsigned int max_writemask_count
)
242 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
244 if (classes
[i
].WritemaskCount
> max_writemask_count
) {
247 for (j
= 0; j
< 3; j
++) {
248 if (classes
[i
].Writemasks
[j
] == writemask
) {
256 struct variable_get_class_cb_data
{
257 unsigned int * can_change_writemask
;
258 unsigned int conversion_swizzle
;
261 static void variable_get_class_read_cb(
263 struct rc_instruction
* inst
,
264 struct rc_pair_instruction_arg
* arg
,
265 struct rc_pair_instruction_source
* src
)
267 struct variable_get_class_cb_data
* d
= userdata
;
268 unsigned int new_swizzle
= rc_adjust_channels(arg
->Swizzle
,
269 d
->conversion_swizzle
);
270 if (!r300_swizzle_is_native_basic(new_swizzle
)) {
271 *d
->can_change_writemask
= 0;
275 static enum rc_reg_class
variable_get_class(
276 struct rc_variable
* variable
,
277 struct rc_class
* classes
)
280 unsigned int can_change_writemask
= 1;
281 unsigned int writemask
= rc_variable_writemask_sum(variable
);
282 struct rc_list
* readers
= rc_variable_readers_union(variable
);
285 if (!variable
->C
->is_r500
) {
287 struct rc_variable
* var_ptr
;
288 /* The assumption here is that if an instruction has type
289 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
290 * r300 and r400 can't swizzle the result of a TEX lookup. */
291 for (var_ptr
= variable
; var_ptr
; var_ptr
= var_ptr
->Friend
) {
292 if (var_ptr
->Inst
->Type
== RC_INSTRUCTION_NORMAL
) {
293 writemask
= RC_MASK_XYZW
;
297 /* Check if it is possible to do swizzle packing for r300/r400
298 * without creating non-native swizzles. */
299 class_index
= find_class(classes
, writemask
, 3);
300 if (class_index
< 0) {
303 c
= classes
[class_index
];
304 if (c
.WritemaskCount
== 1) {
307 for (i
= 0; i
< c
.WritemaskCount
; i
++) {
308 struct rc_variable
* var_ptr
;
309 for (var_ptr
= variable
; var_ptr
;
310 var_ptr
= var_ptr
->Friend
) {
312 unsigned int conversion_swizzle
=
313 rc_make_conversion_swizzle(
314 writemask
, c
.Writemasks
[i
]);
315 struct variable_get_class_cb_data d
;
316 d
.can_change_writemask
= &can_change_writemask
;
317 d
.conversion_swizzle
= conversion_swizzle
;
318 /* If we get this far var_ptr->Inst has to
319 * be a pair instruction. If variable or any
320 * of its friends are normal instructions,
321 * then the writemask will be set to RC_MASK_XYZW
322 * and the function will return before it gets
324 rc_pair_for_all_reads_arg(var_ptr
->Inst
,
325 variable_get_class_read_cb
, &d
);
327 for (j
= 0; j
< var_ptr
->ReaderCount
; j
++) {
328 unsigned int old_swizzle
;
329 unsigned int new_swizzle
;
330 struct rc_reader r
= var_ptr
->Readers
[j
];
332 RC_INSTRUCTION_PAIR
) {
333 old_swizzle
= r
.U
.P
.Arg
->Swizzle
;
335 old_swizzle
= r
.U
.I
.Src
->Swizzle
;
337 new_swizzle
= rc_adjust_channels(
338 old_swizzle
, conversion_swizzle
);
339 if (!r300_swizzle_is_native_basic(
341 can_change_writemask
= 0;
345 if (!can_change_writemask
) {
349 if (!can_change_writemask
) {
355 if (variable
->Inst
->Type
== RC_INSTRUCTION_PAIR
) {
356 /* DDX/DDY seem to always fail when their writemasks are
358 if (is_derivative(variable
->Inst
->U
.P
.RGB
.Opcode
)
359 || is_derivative(variable
->Inst
->U
.P
.Alpha
.Opcode
)) {
360 can_change_writemask
= 0;
363 for ( ; readers
; readers
= readers
->Next
) {
364 struct rc_reader
* r
= readers
->Item
;
365 if (r
->Inst
->Type
== RC_INSTRUCTION_PAIR
) {
366 if (r
->U
.P
.Arg
->Source
== RC_PAIR_PRESUB_SRC
) {
367 can_change_writemask
= 0;
370 /* DDX/DDY also fail when their swizzles are changed. */
371 if (is_derivative(r
->Inst
->U
.P
.RGB
.Opcode
)
372 || is_derivative(r
->Inst
->U
.P
.Alpha
.Opcode
)) {
373 can_change_writemask
= 0;
379 class_index
= find_class(classes
, writemask
,
380 can_change_writemask
? 3 : 1);
382 if (class_index
> -1) {
383 return classes
[class_index
].Class
;
386 rc_error(variable
->C
,
387 "Could not find class for index=%u mask=%u\n",
388 variable
->Dst
.Index
, writemask
);
393 static unsigned int overlap_live_intervals_array(
394 struct live_intervals
* a
,
395 struct live_intervals
* b
)
397 unsigned int a_chan
, b_chan
;
398 for (a_chan
= 0; a_chan
< 4; a_chan
++) {
399 for (b_chan
= 0; b_chan
< 4; b_chan
++) {
400 if (overlap_live_intervals(&a
[a_chan
], &b
[b_chan
])) {
408 static unsigned int reg_get_index(int reg
)
410 return reg
/ RC_MASK_XYZW
;
413 static unsigned int reg_get_writemask(int reg
)
415 return (reg
% RC_MASK_XYZW
) + 1;
418 static int get_reg_id(unsigned int index
, unsigned int writemask
)
421 if (writemask
== 0) {
424 return (index
* RC_MASK_XYZW
) + (writemask
- 1);
428 static void print_reg(int reg
)
430 unsigned int index
= reg_get_index(reg
);
431 unsigned int mask
= reg_get_writemask(reg
);
432 fprintf(stderr
, "Temp[%u].%c%c%c%c", index
,
433 mask
& RC_MASK_X
? 'x' : '_',
434 mask
& RC_MASK_Y
? 'y' : '_',
435 mask
& RC_MASK_Z
? 'z' : '_',
436 mask
& RC_MASK_W
? 'w' : '_');
440 static void add_register_conflicts(
441 struct ra_regs
* regs
,
442 unsigned int max_temp_regs
)
444 unsigned int index
, a_mask
, b_mask
;
445 for (index
= 0; index
< max_temp_regs
; index
++) {
446 for(a_mask
= 1; a_mask
<= RC_MASK_XYZW
; a_mask
++) {
447 for (b_mask
= a_mask
+ 1; b_mask
<= RC_MASK_XYZW
;
449 if (a_mask
& b_mask
) {
450 ra_add_reg_conflict(regs
,
451 get_reg_id(index
, a_mask
),
452 get_reg_id(index
, b_mask
));
459 static void do_advanced_regalloc(struct regalloc_state
* s
)
461 struct rc_class rc_class_list
[] = {
462 {RC_REG_CLASS_SINGLE
, 3, 0, 0,
466 {RC_REG_CLASS_DOUBLE
, 3, 0, 0,
467 {RC_MASK_X
| RC_MASK_Y
,
468 RC_MASK_X
| RC_MASK_Z
,
469 RC_MASK_Y
| RC_MASK_Z
}},
470 {RC_REG_CLASS_TRIPLE
, 1, 0, 0,
471 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_Z
,
474 {RC_REG_CLASS_ALPHA
, 1, 0, 0,
478 {RC_REG_CLASS_SINGLE_PLUS_ALPHA
, 3, 0, 0,
479 {RC_MASK_X
| RC_MASK_W
,
480 RC_MASK_Y
| RC_MASK_W
,
481 RC_MASK_Z
| RC_MASK_W
}},
482 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA
, 3, 0, 0,
483 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_W
,
484 RC_MASK_X
| RC_MASK_Z
| RC_MASK_W
,
485 RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
}},
486 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA
, 1, 0, 0,
487 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
,
490 {RC_REG_CLASS_X
, 1, 0, 0,
494 {RC_REG_CLASS_Y
, 1, 0, 0,
498 {RC_REG_CLASS_Z
, 1, 0, 0,
502 {RC_REG_CLASS_XY
, 1, 0, 0,
503 {RC_MASK_X
| RC_MASK_Y
,
506 {RC_REG_CLASS_YZ
, 1, 0, 0,
507 {RC_MASK_Y
| RC_MASK_Z
,
510 {RC_REG_CLASS_XZ
, 1, 0, 0,
511 {RC_MASK_X
| RC_MASK_Z
,
514 {RC_REG_CLASS_XW
, 1, 0, 0,
515 {RC_MASK_X
| RC_MASK_W
,
518 {RC_REG_CLASS_YW
, 1, 0, 0,
519 {RC_MASK_Y
| RC_MASK_W
,
522 {RC_REG_CLASS_ZW
, 1, 0, 0,
523 {RC_MASK_Z
| RC_MASK_W
,
526 {RC_REG_CLASS_XYW
, 1, 0, 0,
527 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_W
,
530 {RC_REG_CLASS_YZW
, 1, 0, 0,
531 {RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
,
534 {RC_REG_CLASS_XZW
, 1, 0, 0,
535 {RC_MASK_X
| RC_MASK_Z
| RC_MASK_W
,
540 unsigned int i
, j
, index
, input_node
, node_count
, node_index
;
541 unsigned int * node_classes
;
542 struct rc_instruction
* inst
;
543 struct rc_list
* var_ptr
;
544 struct rc_list
* variables
;
545 struct ra_regs
* regs
;
546 struct ra_graph
* graph
;
548 /* Allocate the main ra data structure */
549 regs
= ra_alloc_reg_set(NULL
, s
->C
->max_temp_regs
* RC_MASK_XYZW
);
551 /* Get list of program variables */
552 variables
= rc_get_variables(s
->C
);
553 node_count
= rc_list_count(variables
);
554 node_classes
= memory_pool_malloc(&s
->C
->Pool
,
555 node_count
* sizeof(unsigned int));
557 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
558 var_ptr
= var_ptr
->Next
, node_index
++) {
559 unsigned int class_index
;
560 /* Compute the live intervals */
561 rc_variable_compute_live_intervals(var_ptr
->Item
);
563 class_index
= variable_get_class(var_ptr
->Item
, rc_class_list
);
565 /* If we haven't used this register class yet, mark it
566 * as used and allocate space for it. */
567 if (!rc_class_list
[class_index
].Used
) {
568 rc_class_list
[class_index
].Used
= 1;
569 rc_class_list
[class_index
].Id
= ra_alloc_reg_class(regs
);
572 node_classes
[node_index
] = rc_class_list
[class_index
].Id
;
576 /* Assign registers to the classes */
577 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
578 struct rc_class
class = rc_class_list
[i
];
583 for (index
= 0; index
< s
->C
->max_temp_regs
; index
++) {
584 for (j
= 0; j
< class.WritemaskCount
; j
++) {
585 int reg_id
= get_reg_id(index
,
586 class.Writemasks
[j
]);
587 ra_class_add_reg(regs
, class.Id
, reg_id
);
592 /* Add register conflicts */
593 add_register_conflicts(regs
, s
->C
->max_temp_regs
);
595 /* Calculate live intervals for input registers */
596 for (inst
= s
->C
->Program
.Instructions
.Next
;
597 inst
!= &s
->C
->Program
.Instructions
;
599 rc_opcode op
= rc_get_flow_control_inst(inst
);
600 if (op
== RC_OPCODE_BGNLOOP
) {
601 struct rc_instruction
* endloop
=
602 rc_match_bgnloop(inst
);
603 if (endloop
->IP
> s
->LoopEnd
) {
604 s
->LoopEnd
= endloop
->IP
;
607 rc_for_all_reads_mask(inst
, scan_read_callback
, s
);
610 /* Compute the writemask for inputs. */
611 for (i
= 0; i
< s
->NumInputs
; i
++) {
612 unsigned int chan
, class_id
, writemask
= 0;
613 for (chan
= 0; chan
< 4; chan
++) {
614 if (s
->Input
[i
].Live
[chan
].Used
) {
615 writemask
|= (1 << chan
);
618 s
->Input
[i
].Writemask
= writemask
;
621 ra_set_finalize(regs
, NULL
);
623 graph
= ra_alloc_interference_graph(regs
, node_count
+ s
->NumInputs
);
625 /* Build the interference graph */
626 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
627 var_ptr
= var_ptr
->Next
,node_index
++) {
628 struct rc_list
* a
, * b
;
629 unsigned int b_index
;
631 ra_set_node_class(graph
, node_index
, node_classes
[node_index
]);
633 for (a
= var_ptr
, b
= var_ptr
->Next
, b_index
= node_index
+ 1;
634 b
; b
= b
->Next
, b_index
++) {
635 struct rc_variable
* var_a
= a
->Item
;
637 struct rc_variable
* var_b
= b
->Item
;
639 if (overlap_live_intervals_array(var_a
->Live
, var_b
->Live
)) {
640 ra_add_node_interference(graph
,
641 node_index
, b_index
);
643 var_b
= var_b
->Friend
;
645 var_a
= var_a
->Friend
;
650 /* Add input registers to the interference graph */
651 for (i
= 0, input_node
= 0; i
< s
->NumInputs
; i
++) {
652 if (!s
->Input
[i
].Writemask
) {
655 for (var_ptr
= variables
, node_index
= 0;
656 var_ptr
; var_ptr
= var_ptr
->Next
, node_index
++) {
657 struct rc_variable
* var
= var_ptr
->Item
;
658 if (overlap_live_intervals_array(s
->Input
[i
].Live
,
660 ra_add_node_interference(graph
, node_index
,
661 node_count
+ input_node
);
664 /* Manually allocate a register for this input */
665 ra_set_node_reg(graph
, node_count
+ input_node
, get_reg_id(
666 s
->Input
[i
].Index
, s
->Input
[i
].Writemask
));
670 if (!ra_allocate_no_spills(graph
)) {
671 rc_error(s
->C
, "Ran out of hardware temporaries\n");
675 /* Rewrite the registers */
676 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
677 var_ptr
= var_ptr
->Next
, node_index
++) {
678 int reg
= ra_get_node_reg(graph
, node_index
);
679 unsigned int writemask
= reg_get_writemask(reg
);
680 unsigned int index
= reg_get_index(reg
);
681 struct rc_variable
* var
= var_ptr
->Item
;
683 if (!s
->C
->is_r500
&& var
->Inst
->Type
== RC_INSTRUCTION_NORMAL
) {
684 writemask
= rc_variable_writemask_sum(var
);
687 if (var
->Dst
.File
== RC_FILE_INPUT
) {
690 rc_variable_change_dst(var
, index
, writemask
);
698 * @param user This parameter should be a pointer to an integer value. If this
699 * integer value is zero, then a simple register allocator will be used that
700 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
701 * user is non-zero, then the regular register allocator will be used
704 void rc_pair_regalloc(struct radeon_compiler
*cc
, void *user
)
706 struct r300_fragment_program_compiler
*c
=
707 (struct r300_fragment_program_compiler
*)cc
;
708 struct regalloc_state s
;
709 int * do_full_regalloc
= (int*)user
;
711 memset(&s
, 0, sizeof(s
));
713 s
.NumInputs
= rc_get_max_index(cc
, RC_FILE_INPUT
) + 1;
714 s
.Input
= memory_pool_malloc(&cc
->Pool
,
715 s
.NumInputs
* sizeof(struct register_info
));
716 memset(s
.Input
, 0, s
.NumInputs
* sizeof(struct register_info
));
718 s
.NumTemporaries
= rc_get_max_index(cc
, RC_FILE_TEMPORARY
) + 1;
719 s
.Temporary
= memory_pool_malloc(&cc
->Pool
,
720 s
.NumTemporaries
* sizeof(struct register_info
));
721 memset(s
.Temporary
, 0, s
.NumTemporaries
* sizeof(struct register_info
));
723 rc_recompute_ips(s
.C
);
725 c
->AllocateHwInputs(c
, &alloc_input_simple
, &s
);
726 if (*do_full_regalloc
) {
727 do_advanced_regalloc(&s
);
730 do_regalloc_inputs_only(&s
);
733 /* Rewrite inputs and if we are doing the simple allocation, rewrite
734 * temporaries too. */
735 for (struct rc_instruction
*inst
= s
.C
->Program
.Instructions
.Next
;
736 inst
!= &s
.C
->Program
.Instructions
;
738 rc_remap_registers(inst
, &remap_register
, &s
);