2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_program_pair.h"
33 #include "main/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
52 struct register_info
{
53 struct live_intervals Live
[4];
56 unsigned int Allocated
:1;
58 unsigned int Index
:RC_REGISTER_INDEX_BITS
;
59 unsigned int Writemask
;
62 struct regalloc_state
{
63 struct radeon_compiler
* C
;
65 struct register_info
* Input
;
66 unsigned int NumInputs
;
68 struct register_info
* Temporary
;
69 unsigned int NumTemporaries
;
78 unsigned int WritemaskCount
;
80 /** List of writemasks that belong to this class */
81 unsigned int Writemasks
[3];
86 static const struct rc_class rc_class_list
[] = {
87 {RC_REG_CLASS_SINGLE
, 3,
91 {RC_REG_CLASS_DOUBLE
, 3,
92 {RC_MASK_X
| RC_MASK_Y
,
93 RC_MASK_X
| RC_MASK_Z
,
94 RC_MASK_Y
| RC_MASK_Z
}},
95 {RC_REG_CLASS_TRIPLE
, 1,
96 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_Z
,
99 {RC_REG_CLASS_ALPHA
, 1,
103 {RC_REG_CLASS_SINGLE_PLUS_ALPHA
, 3,
104 {RC_MASK_X
| RC_MASK_W
,
105 RC_MASK_Y
| RC_MASK_W
,
106 RC_MASK_Z
| RC_MASK_W
}},
107 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA
, 3,
108 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_W
,
109 RC_MASK_X
| RC_MASK_Z
| RC_MASK_W
,
110 RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
}},
111 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA
, 1,
112 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
,
128 {RC_MASK_X
| RC_MASK_Y
,
132 {RC_MASK_Y
| RC_MASK_Z
,
136 {RC_MASK_X
| RC_MASK_Z
,
140 {RC_MASK_X
| RC_MASK_W
,
144 {RC_MASK_Y
| RC_MASK_W
,
148 {RC_MASK_Z
| RC_MASK_W
,
151 {RC_REG_CLASS_XYW
, 1,
152 {RC_MASK_X
| RC_MASK_Y
| RC_MASK_W
,
155 {RC_REG_CLASS_YZW
, 1,
156 {RC_MASK_Y
| RC_MASK_Z
| RC_MASK_W
,
159 {RC_REG_CLASS_XZW
, 1,
160 {RC_MASK_X
| RC_MASK_Z
| RC_MASK_W
,
165 static void print_live_intervals(struct live_intervals
* src
)
167 if (!src
|| !src
->Used
) {
172 DBG("(%i,%i)", src
->Start
, src
->End
);
175 static int overlap_live_intervals(struct live_intervals
* a
, struct live_intervals
* b
)
178 DBG("overlap_live_intervals: ");
179 print_live_intervals(a
);
181 print_live_intervals(b
);
185 if (!a
->Used
|| !b
->Used
) {
186 DBG(" unused interval\n");
190 if (a
->Start
> b
->Start
) {
191 if (a
->Start
< b
->End
) {
195 } else if (b
->Start
> a
->Start
) {
196 if (b
->Start
< a
->End
) {
200 } else { /* a->Start == b->Start */
201 if (a
->Start
!= a
->End
&& b
->Start
!= b
->End
) {
207 DBG(" no overlap\n");
212 static void scan_read_callback(void * data
, struct rc_instruction
* inst
,
213 rc_register_file file
, unsigned int index
, unsigned int mask
)
215 struct regalloc_state
* s
= data
;
216 struct register_info
* reg
;
219 if (file
!= RC_FILE_INPUT
)
222 s
->Input
[index
].Used
= 1;
223 reg
= &s
->Input
[index
];
225 for (i
= 0; i
< 4; i
++) {
226 if (!((mask
>> i
) & 0x1)) {
229 reg
->Live
[i
].Used
= 1;
230 reg
->Live
[i
].Start
= 0;
232 s
->LoopEnd
> inst
->IP
? s
->LoopEnd
: inst
->IP
;
236 static void remap_register(void * data
, struct rc_instruction
* inst
,
237 rc_register_file
* file
, unsigned int * index
)
239 struct regalloc_state
* s
= data
;
240 const struct register_info
* reg
;
242 if (*file
== RC_FILE_TEMPORARY
&& s
->Simple
)
243 reg
= &s
->Temporary
[*index
];
244 else if (*file
== RC_FILE_INPUT
)
245 reg
= &s
->Input
[*index
];
249 if (reg
->Allocated
) {
254 static void alloc_input_simple(void * data
, unsigned int input
,
257 struct regalloc_state
* s
= data
;
259 if (input
>= s
->NumInputs
)
262 s
->Input
[input
].Allocated
= 1;
263 s
->Input
[input
].File
= RC_FILE_TEMPORARY
;
264 s
->Input
[input
].Index
= hwreg
;
267 /* This functions offsets the temporary register indices by the number
268 * of input registers, because input registers are actually temporaries and
269 * should not occupy the same space.
271 * This pass is supposed to be used to maintain correct allocation of inputs
272 * if the standard register allocation is disabled. */
273 static void do_regalloc_inputs_only(struct regalloc_state
* s
)
275 for (unsigned i
= 0; i
< s
->NumTemporaries
; i
++) {
276 s
->Temporary
[i
].Allocated
= 1;
277 s
->Temporary
[i
].File
= RC_FILE_TEMPORARY
;
278 s
->Temporary
[i
].Index
= i
+ s
->NumInputs
;
282 static unsigned int is_derivative(rc_opcode op
)
284 return (op
== RC_OPCODE_DDX
|| op
== RC_OPCODE_DDY
);
287 static int find_class(
288 const struct rc_class
* classes
,
289 unsigned int writemask
,
290 unsigned int max_writemask_count
)
293 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
295 if (classes
[i
].WritemaskCount
> max_writemask_count
) {
298 for (j
= 0; j
< 3; j
++) {
299 if (classes
[i
].Writemasks
[j
] == writemask
) {
307 struct variable_get_class_cb_data
{
308 unsigned int * can_change_writemask
;
309 unsigned int conversion_swizzle
;
312 static void variable_get_class_read_cb(
314 struct rc_instruction
* inst
,
315 struct rc_pair_instruction_arg
* arg
,
316 struct rc_pair_instruction_source
* src
)
318 struct variable_get_class_cb_data
* d
= userdata
;
319 unsigned int new_swizzle
= rc_adjust_channels(arg
->Swizzle
,
320 d
->conversion_swizzle
);
321 if (!r300_swizzle_is_native_basic(new_swizzle
)) {
322 *d
->can_change_writemask
= 0;
326 static enum rc_reg_class
variable_get_class(
327 struct rc_variable
* variable
,
328 const struct rc_class
* classes
)
331 unsigned int can_change_writemask
= 1;
332 unsigned int writemask
= rc_variable_writemask_sum(variable
);
333 struct rc_list
* readers
= rc_variable_readers_union(variable
);
336 if (!variable
->C
->is_r500
) {
338 struct rc_variable
* var_ptr
;
339 /* The assumption here is that if an instruction has type
340 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341 * r300 and r400 can't swizzle the result of a TEX lookup. */
342 for (var_ptr
= variable
; var_ptr
; var_ptr
= var_ptr
->Friend
) {
343 if (var_ptr
->Inst
->Type
== RC_INSTRUCTION_NORMAL
) {
344 writemask
= RC_MASK_XYZW
;
348 /* Check if it is possible to do swizzle packing for r300/r400
349 * without creating non-native swizzles. */
350 class_index
= find_class(classes
, writemask
, 3);
351 if (class_index
< 0) {
354 c
= classes
[class_index
];
355 if (c
.WritemaskCount
== 1) {
358 for (i
= 0; i
< c
.WritemaskCount
; i
++) {
359 struct rc_variable
* var_ptr
;
360 for (var_ptr
= variable
; var_ptr
;
361 var_ptr
= var_ptr
->Friend
) {
363 unsigned int conversion_swizzle
=
364 rc_make_conversion_swizzle(
365 writemask
, c
.Writemasks
[i
]);
366 struct variable_get_class_cb_data d
;
367 d
.can_change_writemask
= &can_change_writemask
;
368 d
.conversion_swizzle
= conversion_swizzle
;
369 /* If we get this far var_ptr->Inst has to
370 * be a pair instruction. If variable or any
371 * of its friends are normal instructions,
372 * then the writemask will be set to RC_MASK_XYZW
373 * and the function will return before it gets
375 rc_pair_for_all_reads_arg(var_ptr
->Inst
,
376 variable_get_class_read_cb
, &d
);
378 for (j
= 0; j
< var_ptr
->ReaderCount
; j
++) {
379 unsigned int old_swizzle
;
380 unsigned int new_swizzle
;
381 struct rc_reader r
= var_ptr
->Readers
[j
];
383 RC_INSTRUCTION_PAIR
) {
384 old_swizzle
= r
.U
.P
.Arg
->Swizzle
;
386 /* Source operands of TEX
387 * instructions can't be
388 * swizzle on r300/r400 GPUs.
390 can_change_writemask
= 0;
393 new_swizzle
= rc_adjust_channels(
394 old_swizzle
, conversion_swizzle
);
395 if (!r300_swizzle_is_native_basic(
397 can_change_writemask
= 0;
401 if (!can_change_writemask
) {
405 if (!can_change_writemask
) {
411 if (variable
->Inst
->Type
== RC_INSTRUCTION_PAIR
) {
412 /* DDX/DDY seem to always fail when their writemasks are
414 if (is_derivative(variable
->Inst
->U
.P
.RGB
.Opcode
)
415 || is_derivative(variable
->Inst
->U
.P
.Alpha
.Opcode
)) {
416 can_change_writemask
= 0;
419 for ( ; readers
; readers
= readers
->Next
) {
420 struct rc_reader
* r
= readers
->Item
;
421 if (r
->Inst
->Type
== RC_INSTRUCTION_PAIR
) {
422 if (r
->U
.P
.Arg
->Source
== RC_PAIR_PRESUB_SRC
) {
423 can_change_writemask
= 0;
426 /* DDX/DDY also fail when their swizzles are changed. */
427 if (is_derivative(r
->Inst
->U
.P
.RGB
.Opcode
)
428 || is_derivative(r
->Inst
->U
.P
.Alpha
.Opcode
)) {
429 can_change_writemask
= 0;
435 class_index
= find_class(classes
, writemask
,
436 can_change_writemask
? 3 : 1);
438 if (class_index
> -1) {
439 return classes
[class_index
].ID
;
442 rc_error(variable
->C
,
443 "Could not find class for index=%u mask=%u\n",
444 variable
->Dst
.Index
, writemask
);
449 static unsigned int overlap_live_intervals_array(
450 struct live_intervals
* a
,
451 struct live_intervals
* b
)
453 unsigned int a_chan
, b_chan
;
454 for (a_chan
= 0; a_chan
< 4; a_chan
++) {
455 for (b_chan
= 0; b_chan
< 4; b_chan
++) {
456 if (overlap_live_intervals(&a
[a_chan
], &b
[b_chan
])) {
464 static unsigned int reg_get_index(int reg
)
466 return reg
/ RC_MASK_XYZW
;
469 static unsigned int reg_get_writemask(int reg
)
471 return (reg
% RC_MASK_XYZW
) + 1;
474 static int get_reg_id(unsigned int index
, unsigned int writemask
)
477 if (writemask
== 0) {
480 return (index
* RC_MASK_XYZW
) + (writemask
- 1);
484 static void print_reg(int reg
)
486 unsigned int index
= reg_get_index(reg
);
487 unsigned int mask
= reg_get_writemask(reg
);
488 fprintf(stderr
, "Temp[%u].%c%c%c%c", index
,
489 mask
& RC_MASK_X
? 'x' : '_',
490 mask
& RC_MASK_Y
? 'y' : '_',
491 mask
& RC_MASK_Z
? 'z' : '_',
492 mask
& RC_MASK_W
? 'w' : '_');
496 static void add_register_conflicts(
497 struct ra_regs
* regs
,
498 unsigned int max_temp_regs
)
500 unsigned int index
, a_mask
, b_mask
;
501 for (index
= 0; index
< max_temp_regs
; index
++) {
502 for(a_mask
= 1; a_mask
<= RC_MASK_XYZW
; a_mask
++) {
503 for (b_mask
= a_mask
+ 1; b_mask
<= RC_MASK_XYZW
;
505 if (a_mask
& b_mask
) {
506 ra_add_reg_conflict(regs
,
507 get_reg_id(index
, a_mask
),
508 get_reg_id(index
, b_mask
));
515 static void do_advanced_regalloc(struct regalloc_state
* s
)
518 unsigned int i
, input_node
, node_count
, node_index
;
519 unsigned int * node_classes
;
520 struct rc_instruction
* inst
;
521 struct rc_list
* var_ptr
;
522 struct rc_list
* variables
;
523 struct ra_graph
* graph
;
524 const struct rc_regalloc_state
*ra_state
= s
->C
->regalloc_state
;
526 /* Get list of program variables */
527 variables
= rc_get_variables(s
->C
);
528 node_count
= rc_list_count(variables
);
529 node_classes
= memory_pool_malloc(&s
->C
->Pool
,
530 node_count
* sizeof(unsigned int));
532 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
533 var_ptr
= var_ptr
->Next
, node_index
++) {
534 unsigned int class_index
;
535 /* Compute the live intervals */
536 rc_variable_compute_live_intervals(var_ptr
->Item
);
538 class_index
= variable_get_class(var_ptr
->Item
, rc_class_list
);
539 node_classes
[node_index
] = ra_state
->class_ids
[class_index
];
543 /* Calculate live intervals for input registers */
544 for (inst
= s
->C
->Program
.Instructions
.Next
;
545 inst
!= &s
->C
->Program
.Instructions
;
547 rc_opcode op
= rc_get_flow_control_inst(inst
);
548 if (op
== RC_OPCODE_BGNLOOP
) {
549 struct rc_instruction
* endloop
=
550 rc_match_bgnloop(inst
);
551 if (endloop
->IP
> s
->LoopEnd
) {
552 s
->LoopEnd
= endloop
->IP
;
555 rc_for_all_reads_mask(inst
, scan_read_callback
, s
);
558 /* Compute the writemask for inputs. */
559 for (i
= 0; i
< s
->NumInputs
; i
++) {
560 unsigned int chan
, writemask
= 0;
561 for (chan
= 0; chan
< 4; chan
++) {
562 if (s
->Input
[i
].Live
[chan
].Used
) {
563 writemask
|= (1 << chan
);
566 s
->Input
[i
].Writemask
= writemask
;
569 graph
= ra_alloc_interference_graph(ra_state
->regs
,
570 node_count
+ s
->NumInputs
);
572 for (node_index
= 0; node_index
< node_count
; node_index
++) {
573 ra_set_node_class(graph
, node_index
, node_classes
[node_index
]);
576 /* Build the interference graph */
577 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
578 var_ptr
= var_ptr
->Next
,node_index
++) {
579 struct rc_list
* a
, * b
;
580 unsigned int b_index
;
582 for (a
= var_ptr
, b
= var_ptr
->Next
, b_index
= node_index
+ 1;
583 b
; b
= b
->Next
, b_index
++) {
584 struct rc_variable
* var_a
= a
->Item
;
586 struct rc_variable
* var_b
= b
->Item
;
588 if (overlap_live_intervals_array(var_a
->Live
, var_b
->Live
)) {
589 ra_add_node_interference(graph
,
590 node_index
, b_index
);
592 var_b
= var_b
->Friend
;
594 var_a
= var_a
->Friend
;
599 /* Add input registers to the interference graph */
600 for (i
= 0, input_node
= 0; i
< s
->NumInputs
; i
++) {
601 if (!s
->Input
[i
].Writemask
) {
604 for (var_ptr
= variables
, node_index
= 0;
605 var_ptr
; var_ptr
= var_ptr
->Next
, node_index
++) {
606 struct rc_variable
* var
= var_ptr
->Item
;
607 if (overlap_live_intervals_array(s
->Input
[i
].Live
,
609 ra_add_node_interference(graph
, node_index
,
610 node_count
+ input_node
);
613 /* Manually allocate a register for this input */
614 ra_set_node_reg(graph
, node_count
+ input_node
, get_reg_id(
615 s
->Input
[i
].Index
, s
->Input
[i
].Writemask
));
619 if (!ra_allocate(graph
)) {
620 rc_error(s
->C
, "Ran out of hardware temporaries\n");
624 /* Rewrite the registers */
625 for (var_ptr
= variables
, node_index
= 0; var_ptr
;
626 var_ptr
= var_ptr
->Next
, node_index
++) {
627 int reg
= ra_get_node_reg(graph
, node_index
);
628 unsigned int writemask
= reg_get_writemask(reg
);
629 unsigned int index
= reg_get_index(reg
);
630 struct rc_variable
* var
= var_ptr
->Item
;
632 if (!s
->C
->is_r500
&& var
->Inst
->Type
== RC_INSTRUCTION_NORMAL
) {
633 writemask
= rc_variable_writemask_sum(var
);
636 if (var
->Dst
.File
== RC_FILE_INPUT
) {
639 rc_variable_change_dst(var
, index
, writemask
);
645 void rc_init_regalloc_state(struct rc_regalloc_state
*s
)
647 unsigned i
, j
, index
;
648 unsigned **ra_q_values
;
650 /* Pre-computed q values. This array describes the maximum number of
651 * a class's [row] registers that are in conflict with a single
652 * register from another class [column].
655 * q_values[0][2] is 3, because a register from class 2
656 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
657 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
660 * q_values[2][0] is 1, because a register from class 0
661 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
662 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
664 * The q values for each register class [row] will never be greater
665 * than the maximum number of writemask combinations for that class.
669 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
670 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
673 const unsigned q_values
[RC_REG_CLASS_COUNT
][RC_REG_CLASS_COUNT
] = {
674 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
675 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
676 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
677 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
678 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
679 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
680 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
681 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
682 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
683 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
684 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
685 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
686 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
687 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
688 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
689 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
690 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
691 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
695 /* Allocate the main ra data structure */
696 s
->regs
= ra_alloc_reg_set(NULL
, R500_PFS_NUM_TEMP_REGS
* RC_MASK_XYZW
,
699 /* Create the register classes */
700 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
701 const struct rc_class
*class = &rc_class_list
[i
];
702 s
->class_ids
[class->ID
] = ra_alloc_reg_class(s
->regs
);
704 /* Assign registers to the classes */
705 for (index
= 0; index
< R500_PFS_NUM_TEMP_REGS
; index
++) {
706 for (j
= 0; j
< class->WritemaskCount
; j
++) {
707 int reg_id
= get_reg_id(index
,
708 class->Writemasks
[j
]);
709 ra_class_add_reg(s
->regs
,
710 s
->class_ids
[class->ID
], reg_id
);
715 /* Set the q values. The q_values array is indexed based on
716 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
717 * different than the ID assigned to that class by ra.
718 * This why we need to manually construct this list.
720 ra_q_values
= MALLOC(RC_REG_CLASS_COUNT
* sizeof(unsigned *));
722 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
723 ra_q_values
[i
] = MALLOC(RC_REG_CLASS_COUNT
* sizeof(unsigned));
724 for (j
= 0; j
< RC_REG_CLASS_COUNT
; j
++) {
725 ra_q_values
[s
->class_ids
[i
]][s
->class_ids
[j
]] =
730 /* Add register conflicts */
731 add_register_conflicts(s
->regs
, R500_PFS_NUM_TEMP_REGS
);
733 ra_set_finalize(s
->regs
, ra_q_values
);
735 for (i
= 0; i
< RC_REG_CLASS_COUNT
; i
++) {
736 FREE(ra_q_values
[i
]);
741 void rc_destroy_regalloc_state(struct rc_regalloc_state
*s
)
743 ralloc_free(s
->regs
);
747 * @param user This parameter should be a pointer to an integer value. If this
748 * integer value is zero, then a simple register allocator will be used that
749 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
750 * user is non-zero, then the regular register allocator will be used
753 void rc_pair_regalloc(struct radeon_compiler
*cc
, void *user
)
755 struct r300_fragment_program_compiler
*c
=
756 (struct r300_fragment_program_compiler
*)cc
;
757 struct regalloc_state s
;
758 int * do_full_regalloc
= (int*)user
;
760 memset(&s
, 0, sizeof(s
));
762 s
.NumInputs
= rc_get_max_index(cc
, RC_FILE_INPUT
) + 1;
763 s
.Input
= memory_pool_malloc(&cc
->Pool
,
764 s
.NumInputs
* sizeof(struct register_info
));
765 memset(s
.Input
, 0, s
.NumInputs
* sizeof(struct register_info
));
767 s
.NumTemporaries
= rc_get_max_index(cc
, RC_FILE_TEMPORARY
) + 1;
768 s
.Temporary
= memory_pool_malloc(&cc
->Pool
,
769 s
.NumTemporaries
* sizeof(struct register_info
));
770 memset(s
.Temporary
, 0, s
.NumTemporaries
* sizeof(struct register_info
));
772 rc_recompute_ips(s
.C
);
774 c
->AllocateHwInputs(c
, &alloc_input_simple
, &s
);
775 if (*do_full_regalloc
) {
776 do_advanced_regalloc(&s
);
779 do_regalloc_inputs_only(&s
);
782 /* Rewrite inputs and if we are doing the simple allocation, rewrite
783 * temporaries too. */
784 for (struct rc_instruction
*inst
= s
.C
->Program
.Instructions
.Next
;
785 inst
!= &s
.C
->Program
.Instructions
;
787 rc_remap_registers(inst
, &remap_register
, &s
);