r300/compiler: Fix register allocator's handling of loops
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_dataflow.h"
34
35
36 #define VERBOSE 0
37
38 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
39
40
41 struct live_intervals {
42 int Start;
43 int End;
44 struct live_intervals * Next;
45 };
46
47 struct register_info {
48 struct live_intervals Live;
49
50 unsigned int Used:1;
51 unsigned int Allocated:1;
52 unsigned int File:3;
53 unsigned int Index:RC_REGISTER_INDEX_BITS;
54 };
55
56 struct hardware_register {
57 struct live_intervals * Used;
58 };
59
60 struct regalloc_state {
61 struct radeon_compiler * C;
62
63 struct register_info Input[RC_REGISTER_MAX_INDEX];
64 struct register_info Temporary[RC_REGISTER_MAX_INDEX];
65
66 struct hardware_register * HwTemporary;
67 unsigned int NumHwTemporaries;
68 /**
69 * If an instruction is inside of a loop, EndLoop will be the
70 * IP of the ENDLOOP instruction, and BeginLoop will be the IP
71 * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
72 * will be -1.
73 */
74 int EndLoop;
75 int BeginLoop;
76 };
77
78 static void print_live_intervals(struct live_intervals * src)
79 {
80 if (!src) {
81 DBG("(null)");
82 return;
83 }
84
85 while(src) {
86 DBG("(%i,%i)", src->Start, src->End);
87 src = src->Next;
88 }
89 }
90
91 static void add_live_intervals(struct regalloc_state * s,
92 struct live_intervals ** dst, struct live_intervals * src)
93 {
94 struct live_intervals ** dst_backup = dst;
95
96 if (VERBOSE) {
97 DBG("add_live_intervals: ");
98 print_live_intervals(*dst);
99 DBG(" to ");
100 print_live_intervals(src);
101 DBG("\n");
102 }
103
104 while(src) {
105 if (*dst && (*dst)->End < src->Start) {
106 dst = &(*dst)->Next;
107 } else if (!*dst || (*dst)->Start > src->End) {
108 struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
109 li->Start = src->Start;
110 li->End = src->End;
111 li->Next = *dst;
112 *dst = li;
113 src = src->Next;
114 } else {
115 if (src->End > (*dst)->End)
116 (*dst)->End = src->End;
117 if (src->Start < (*dst)->Start)
118 (*dst)->Start = src->Start;
119 src = src->Next;
120 }
121 }
122
123 if (VERBOSE) {
124 DBG(" result: ");
125 print_live_intervals(*dst_backup);
126 DBG("\n");
127 }
128 }
129
130 static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
131 {
132 if (VERBOSE) {
133 DBG("overlap_live_intervals: ");
134 print_live_intervals(dst);
135 DBG(" to ");
136 print_live_intervals(src);
137 DBG("\n");
138 }
139
140 while(src && dst) {
141 if (dst->End <= src->Start) {
142 dst = dst->Next;
143 } else if (dst->End <= src->End) {
144 DBG(" overlap\n");
145 return 1;
146 } else if (dst->Start < src->End) {
147 DBG(" overlap\n");
148 return 1;
149 } else {
150 src = src->Next;
151 }
152 }
153
154 DBG(" no overlap\n");
155
156 return 0;
157 }
158
159 static int try_add_live_intervals(struct regalloc_state * s,
160 struct live_intervals ** dst, struct live_intervals * src)
161 {
162 if (overlap_live_intervals(*dst, src))
163 return 0;
164
165 add_live_intervals(s, dst, src);
166 return 1;
167 }
168
169 static void scan_callback(void * data, struct rc_instruction * inst,
170 rc_register_file file, unsigned int index, unsigned int mask)
171 {
172 struct regalloc_state * s = data;
173 struct register_info * reg;
174
175 if (file == RC_FILE_TEMPORARY)
176 reg = &s->Temporary[index];
177 else if (file == RC_FILE_INPUT)
178 reg = &s->Input[index];
179 else
180 return;
181
182 if (!reg->Used) {
183 reg->Used = 1;
184 if (file == RC_FILE_INPUT)
185 reg->Live.Start = -1;
186 else if (s->BeginLoop >= 0)
187 reg->Live.Start = s->BeginLoop;
188 else
189 reg->Live.Start = inst->IP;
190 reg->Live.End = inst->IP;
191 } else if (s->EndLoop >= 0)
192 reg->Live.End = s->EndLoop;
193 else if (inst->IP > reg->Live.End)
194 reg->Live.End = inst->IP;
195 }
196
197 static void compute_live_intervals(struct radeon_compiler *c,
198 struct regalloc_state *s)
199 {
200 memset(s, 0, sizeof(*s));
201 s->C = c;
202 s->NumHwTemporaries = c->max_temp_regs;
203 s->BeginLoop = -1;
204 s->EndLoop = -1;
205 s->HwTemporary =
206 memory_pool_malloc(&c->Pool,
207 s->NumHwTemporaries * sizeof(struct hardware_register));
208 memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
209
210 rc_recompute_ips(s->C);
211
212 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
213 inst != &s->C->Program.Instructions;
214 inst = inst->Next) {
215
216 /* For all instructions inside of a loop, the ENDLOOP
217 * instruction is used as the end of the live interval and
218 * the BGNLOOP instruction is used as the beginning. */
219 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
220 s->BeginLoop = inst->IP;
221 int loops = 1;
222 struct rc_instruction * tmp;
223 for(tmp = inst->Next;
224 tmp != &s->C->Program.Instructions;
225 tmp = tmp->Next) {
226 if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
227 loops++;
228 } else if (tmp->U.I.Opcode
229 == RC_OPCODE_ENDLOOP) {
230 if(!--loops) {
231 s->EndLoop = tmp->IP;
232 break;
233 }
234 }
235 }
236 }
237
238 if (inst->IP == s->EndLoop) {
239 s->EndLoop = -1;
240 s->BeginLoop = -1;
241 }
242
243 rc_for_all_reads_mask(inst, scan_callback, s);
244 rc_for_all_writes_mask(inst, scan_callback, s);
245 }
246 }
247
248 static void remap_register(void * data, struct rc_instruction * inst,
249 rc_register_file * file, unsigned int * index)
250 {
251 struct regalloc_state * s = data;
252 const struct register_info * reg;
253
254 if (*file == RC_FILE_TEMPORARY)
255 reg = &s->Temporary[*index];
256 else if (*file == RC_FILE_INPUT)
257 reg = &s->Input[*index];
258 else
259 return;
260
261 if (reg->Allocated) {
262 *file = reg->File;
263 *index = reg->Index;
264 }
265 }
266
267 static void do_regalloc(struct regalloc_state * s)
268 {
269 /* Simple and stupid greedy register allocation */
270 for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
271 struct register_info * reg = &s->Temporary[index];
272
273 if (!reg->Used)
274 continue;
275
276 for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
277 if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
278 reg->Allocated = 1;
279 reg->File = RC_FILE_TEMPORARY;
280 reg->Index = hwreg;
281 goto success;
282 }
283 }
284
285 rc_error(s->C, "Ran out of hardware temporaries\n");
286 return;
287
288 success:;
289 }
290
291 /* Rewrite all instructions based on the translation table we built */
292 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
293 inst != &s->C->Program.Instructions;
294 inst = inst->Next) {
295 rc_remap_registers(inst, &remap_register, s);
296 }
297 }
298
299 static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
300 {
301 struct regalloc_state * s = data;
302
303 if (!s->Input[input].Used)
304 return;
305
306 add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
307
308 s->Input[input].Allocated = 1;
309 s->Input[input].File = RC_FILE_TEMPORARY;
310 s->Input[input].Index = hwreg;
311
312 }
313
314 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
315 {
316 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
317 struct regalloc_state s;
318
319 compute_live_intervals(cc, &s);
320
321 c->AllocateHwInputs(c, &alloc_input, &s);
322
323 do_regalloc(&s);
324 }
325
326 /* This functions offsets the temporary register indices by the number
327 * of input registers, because input registers are actually temporaries and
328 * should not occupy the same space.
329 *
330 * This pass is supposed to be used to maintain correct allocation of inputs
331 * if the standard register allocation is disabled. */
332 void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
333 {
334 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
335 struct regalloc_state s;
336 int temp_reg_offset;
337
338 compute_live_intervals(cc, &s);
339
340 c->AllocateHwInputs(c, &alloc_input, &s);
341
342 temp_reg_offset = 0;
343 for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
344 if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
345 temp_reg_offset = s.Input[i].Index + 1;
346 }
347
348 if (temp_reg_offset) {
349 for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
350 if (s.Temporary[i].Used) {
351 s.Temporary[i].Allocated = 1;
352 s.Temporary[i].File = RC_FILE_TEMPORARY;
353 s.Temporary[i].Index = i + temp_reg_offset;
354 }
355 }
356
357 /* Rewrite all registers. */
358 for (struct rc_instruction *inst = cc->Program.Instructions.Next;
359 inst != &cc->Program.Instructions;
360 inst = inst->Next) {
361 rc_remap_registers(inst, &remap_register, &s);
362 }
363 }
364 }