2 * Copyright (C) 2009 Nicolai Haehnle.
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_program_pair.h"
32 #include "radeon_compiler.h"
33 #include "radeon_dataflow.h"
38 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
41 struct live_intervals
{
44 struct live_intervals
* Next
;
47 struct register_info
{
48 struct live_intervals Live
;
51 unsigned int Allocated
:1;
53 unsigned int Index
:RC_REGISTER_INDEX_BITS
;
56 struct hardware_register
{
57 struct live_intervals
* Used
;
60 struct regalloc_state
{
61 struct radeon_compiler
* C
;
63 struct register_info Input
[RC_REGISTER_MAX_INDEX
];
64 struct register_info Temporary
[RC_REGISTER_MAX_INDEX
];
66 struct hardware_register
* HwTemporary
;
67 unsigned int NumHwTemporaries
;
69 * If an instruction is inside of a loop, end_loop will be the
70 * IP of the ENDLOOP instruction, otherwise end_loop will be 0
75 static void print_live_intervals(struct live_intervals
* src
)
83 DBG("(%i,%i)", src
->Start
, src
->End
);
88 static void add_live_intervals(struct regalloc_state
* s
,
89 struct live_intervals
** dst
, struct live_intervals
* src
)
91 struct live_intervals
** dst_backup
= dst
;
94 DBG("add_live_intervals: ");
95 print_live_intervals(*dst
);
97 print_live_intervals(src
);
102 if (*dst
&& (*dst
)->End
< src
->Start
) {
104 } else if (!*dst
|| (*dst
)->Start
> src
->End
) {
105 struct live_intervals
* li
= memory_pool_malloc(&s
->C
->Pool
, sizeof(*li
));
106 li
->Start
= src
->Start
;
112 if (src
->End
> (*dst
)->End
)
113 (*dst
)->End
= src
->End
;
114 if (src
->Start
< (*dst
)->Start
)
115 (*dst
)->Start
= src
->Start
;
122 print_live_intervals(*dst_backup
);
127 static int overlap_live_intervals(struct live_intervals
* dst
, struct live_intervals
* src
)
130 DBG("overlap_live_intervals: ");
131 print_live_intervals(dst
);
133 print_live_intervals(src
);
138 if (dst
->End
<= src
->Start
) {
140 } else if (dst
->End
<= src
->End
) {
143 } else if (dst
->Start
< src
->End
) {
151 DBG(" no overlap\n");
156 static int try_add_live_intervals(struct regalloc_state
* s
,
157 struct live_intervals
** dst
, struct live_intervals
* src
)
159 if (overlap_live_intervals(*dst
, src
))
162 add_live_intervals(s
, dst
, src
);
166 static void scan_callback(void * data
, struct rc_instruction
* inst
,
167 rc_register_file file
, unsigned int index
, unsigned int mask
)
169 struct regalloc_state
* s
= data
;
170 struct register_info
* reg
;
172 if (file
== RC_FILE_TEMPORARY
)
173 reg
= &s
->Temporary
[index
];
174 else if (file
== RC_FILE_INPUT
)
175 reg
= &s
->Input
[index
];
181 if (file
== RC_FILE_INPUT
)
182 reg
->Live
.Start
= -1;
184 reg
->Live
.Start
= inst
->IP
;
185 reg
->Live
.End
= inst
->IP
;
186 } else if (s
->end_loop
)
187 reg
->Live
.End
= s
->end_loop
;
188 else if (inst
->IP
> reg
->Live
.End
)
189 reg
->Live
.End
= inst
->IP
;
192 static void compute_live_intervals(struct radeon_compiler
*c
,
193 struct regalloc_state
*s
)
195 memset(s
, 0, sizeof(*s
));
197 s
->NumHwTemporaries
= c
->max_temp_regs
;
199 memory_pool_malloc(&c
->Pool
,
200 s
->NumHwTemporaries
* sizeof(struct hardware_register
));
201 memset(s
->HwTemporary
, 0, s
->NumHwTemporaries
* sizeof(struct hardware_register
));
203 rc_recompute_ips(s
->C
);
205 for(struct rc_instruction
* inst
= s
->C
->Program
.Instructions
.Next
;
206 inst
!= &s
->C
->Program
.Instructions
;
209 /* For all instructions inside of a loop, the ENDLOOP
210 * instruction is used as the end of the live interval. */
211 if (inst
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
&& !s
->end_loop
) {
213 struct rc_instruction
* tmp
;
214 for(tmp
= inst
->Next
;
215 tmp
!= &s
->C
->Program
.Instructions
;
217 if (tmp
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
) {
219 } else if (tmp
->U
.I
.Opcode
220 == RC_OPCODE_ENDLOOP
) {
222 s
->end_loop
= tmp
->IP
;
229 if (inst
->IP
== s
->end_loop
)
232 rc_for_all_reads_mask(inst
, scan_callback
, s
);
233 rc_for_all_writes_mask(inst
, scan_callback
, s
);
237 static void remap_register(void * data
, struct rc_instruction
* inst
,
238 rc_register_file
* file
, unsigned int * index
)
240 struct regalloc_state
* s
= data
;
241 const struct register_info
* reg
;
243 if (*file
== RC_FILE_TEMPORARY
)
244 reg
= &s
->Temporary
[*index
];
245 else if (*file
== RC_FILE_INPUT
)
246 reg
= &s
->Input
[*index
];
250 if (reg
->Allocated
) {
256 static void do_regalloc(struct regalloc_state
* s
)
258 /* Simple and stupid greedy register allocation */
259 for(unsigned int index
= 0; index
< RC_REGISTER_MAX_INDEX
; ++index
) {
260 struct register_info
* reg
= &s
->Temporary
[index
];
265 for(unsigned int hwreg
= 0; hwreg
< s
->NumHwTemporaries
; ++hwreg
) {
266 if (try_add_live_intervals(s
, &s
->HwTemporary
[hwreg
].Used
, ®
->Live
)) {
268 reg
->File
= RC_FILE_TEMPORARY
;
274 rc_error(s
->C
, "Ran out of hardware temporaries\n");
280 /* Rewrite all instructions based on the translation table we built */
281 for(struct rc_instruction
* inst
= s
->C
->Program
.Instructions
.Next
;
282 inst
!= &s
->C
->Program
.Instructions
;
284 rc_remap_registers(inst
, &remap_register
, s
);
288 static void alloc_input(void * data
, unsigned int input
, unsigned int hwreg
)
290 struct regalloc_state
* s
= data
;
292 if (!s
->Input
[input
].Used
)
295 add_live_intervals(s
, &s
->HwTemporary
[hwreg
].Used
, &s
->Input
[input
].Live
);
297 s
->Input
[input
].Allocated
= 1;
298 s
->Input
[input
].File
= RC_FILE_TEMPORARY
;
299 s
->Input
[input
].Index
= hwreg
;
303 void rc_pair_regalloc(struct radeon_compiler
*cc
, void *user
)
305 struct r300_fragment_program_compiler
*c
= (struct r300_fragment_program_compiler
*)cc
;
306 struct regalloc_state s
;
308 compute_live_intervals(cc
, &s
);
310 c
->AllocateHwInputs(c
, &alloc_input
, &s
);
315 /* This functions offsets the temporary register indices by the number
316 * of input registers, because input registers are actually temporaries and
317 * should not occupy the same space.
319 * This pass is supposed to be used to maintain correct allocation of inputs
320 * if the standard register allocation is disabled. */
321 void rc_pair_regalloc_inputs_only(struct radeon_compiler
*cc
, void *user
)
323 struct r300_fragment_program_compiler
*c
= (struct r300_fragment_program_compiler
*)cc
;
324 struct regalloc_state s
;
327 compute_live_intervals(cc
, &s
);
329 c
->AllocateHwInputs(c
, &alloc_input
, &s
);
332 for (unsigned i
= 0; i
< RC_REGISTER_MAX_INDEX
; i
++) {
333 if (s
.Input
[i
].Allocated
&& temp_reg_offset
<= s
.Input
[i
].Index
)
334 temp_reg_offset
= s
.Input
[i
].Index
+ 1;
337 if (temp_reg_offset
) {
338 for (unsigned i
= 0; i
< RC_REGISTER_MAX_INDEX
; i
++) {
339 if (s
.Temporary
[i
].Used
) {
340 s
.Temporary
[i
].Allocated
= 1;
341 s
.Temporary
[i
].File
= RC_FILE_TEMPORARY
;
342 s
.Temporary
[i
].Index
= i
+ temp_reg_offset
;
346 /* Rewrite all registers. */
347 for (struct rc_instruction
*inst
= cc
->Program
.Instructions
.Next
;
348 inst
!= &cc
->Program
.Instructions
;
350 rc_remap_registers(inst
, &remap_register
, &s
);