Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_dataflow.h"
34
35
36 #define VERBOSE 0
37
38 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
39
40
41 struct live_intervals {
42 int Start;
43 int End;
44 struct live_intervals * Next;
45 };
46
47 struct register_info {
48 struct live_intervals Live;
49
50 unsigned int Used:1;
51 unsigned int Allocated:1;
52 unsigned int File:3;
53 unsigned int Index:RC_REGISTER_INDEX_BITS;
54 };
55
56 struct hardware_register {
57 struct live_intervals * Used;
58 };
59
60 struct regalloc_state {
61 struct radeon_compiler * C;
62
63 struct register_info Input[RC_REGISTER_MAX_INDEX];
64 struct register_info Temporary[RC_REGISTER_MAX_INDEX];
65
66 struct hardware_register * HwTemporary;
67 unsigned int NumHwTemporaries;
68 /**
69 * If an instruction is inside of a loop, end_loop will be the
70 * IP of the ENDLOOP instruction, otherwise end_loop will be 0
71 */
72 int end_loop;
73 };
74
75 static void print_live_intervals(struct live_intervals * src)
76 {
77 if (!src) {
78 DBG("(null)");
79 return;
80 }
81
82 while(src) {
83 DBG("(%i,%i)", src->Start, src->End);
84 src = src->Next;
85 }
86 }
87
88 static void add_live_intervals(struct regalloc_state * s,
89 struct live_intervals ** dst, struct live_intervals * src)
90 {
91 struct live_intervals ** dst_backup = dst;
92
93 if (VERBOSE) {
94 DBG("add_live_intervals: ");
95 print_live_intervals(*dst);
96 DBG(" to ");
97 print_live_intervals(src);
98 DBG("\n");
99 }
100
101 while(src) {
102 if (*dst && (*dst)->End < src->Start) {
103 dst = &(*dst)->Next;
104 } else if (!*dst || (*dst)->Start > src->End) {
105 struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
106 li->Start = src->Start;
107 li->End = src->End;
108 li->Next = *dst;
109 *dst = li;
110 src = src->Next;
111 } else {
112 if (src->End > (*dst)->End)
113 (*dst)->End = src->End;
114 if (src->Start < (*dst)->Start)
115 (*dst)->Start = src->Start;
116 src = src->Next;
117 }
118 }
119
120 if (VERBOSE) {
121 DBG(" result: ");
122 print_live_intervals(*dst_backup);
123 DBG("\n");
124 }
125 }
126
127 static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
128 {
129 if (VERBOSE) {
130 DBG("overlap_live_intervals: ");
131 print_live_intervals(dst);
132 DBG(" to ");
133 print_live_intervals(src);
134 DBG("\n");
135 }
136
137 while(src && dst) {
138 if (dst->End <= src->Start) {
139 dst = dst->Next;
140 } else if (dst->End <= src->End) {
141 DBG(" overlap\n");
142 return 1;
143 } else if (dst->Start < src->End) {
144 DBG(" overlap\n");
145 return 1;
146 } else {
147 src = src->Next;
148 }
149 }
150
151 DBG(" no overlap\n");
152
153 return 0;
154 }
155
156 static int try_add_live_intervals(struct regalloc_state * s,
157 struct live_intervals ** dst, struct live_intervals * src)
158 {
159 if (overlap_live_intervals(*dst, src))
160 return 0;
161
162 add_live_intervals(s, dst, src);
163 return 1;
164 }
165
166 static void scan_callback(void * data, struct rc_instruction * inst,
167 rc_register_file file, unsigned int index, unsigned int mask)
168 {
169 struct regalloc_state * s = data;
170 struct register_info * reg;
171
172 if (file == RC_FILE_TEMPORARY)
173 reg = &s->Temporary[index];
174 else if (file == RC_FILE_INPUT)
175 reg = &s->Input[index];
176 else
177 return;
178
179 if (!reg->Used) {
180 reg->Used = 1;
181 if (file == RC_FILE_INPUT)
182 reg->Live.Start = -1;
183 else
184 reg->Live.Start = inst->IP;
185 reg->Live.End = inst->IP;
186 } else if (s->end_loop)
187 reg->Live.End = s->end_loop;
188 else if (inst->IP > reg->Live.End)
189 reg->Live.End = inst->IP;
190 }
191
192 static void compute_live_intervals(struct radeon_compiler *c,
193 struct regalloc_state *s)
194 {
195 memset(s, 0, sizeof(*s));
196 s->C = c;
197 s->NumHwTemporaries = c->max_temp_regs;
198 s->HwTemporary =
199 memory_pool_malloc(&c->Pool,
200 s->NumHwTemporaries * sizeof(struct hardware_register));
201 memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
202
203 rc_recompute_ips(s->C);
204
205 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
206 inst != &s->C->Program.Instructions;
207 inst = inst->Next) {
208
209 /* For all instructions inside of a loop, the ENDLOOP
210 * instruction is used as the end of the live interval. */
211 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
212 int loops = 1;
213 struct rc_instruction * tmp;
214 for(tmp = inst->Next;
215 tmp != &s->C->Program.Instructions;
216 tmp = tmp->Next) {
217 if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
218 loops++;
219 } else if (tmp->U.I.Opcode
220 == RC_OPCODE_ENDLOOP) {
221 if(!--loops) {
222 s->end_loop = tmp->IP;
223 break;
224 }
225 }
226 }
227 }
228
229 if (inst->IP == s->end_loop)
230 s->end_loop = 0;
231
232 rc_for_all_reads_mask(inst, scan_callback, s);
233 rc_for_all_writes_mask(inst, scan_callback, s);
234 }
235 }
236
237 static void remap_register(void * data, struct rc_instruction * inst,
238 rc_register_file * file, unsigned int * index)
239 {
240 struct regalloc_state * s = data;
241 const struct register_info * reg;
242
243 if (*file == RC_FILE_TEMPORARY)
244 reg = &s->Temporary[*index];
245 else if (*file == RC_FILE_INPUT)
246 reg = &s->Input[*index];
247 else
248 return;
249
250 if (reg->Allocated) {
251 *file = reg->File;
252 *index = reg->Index;
253 }
254 }
255
256 static void do_regalloc(struct regalloc_state * s)
257 {
258 /* Simple and stupid greedy register allocation */
259 for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
260 struct register_info * reg = &s->Temporary[index];
261
262 if (!reg->Used)
263 continue;
264
265 for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
266 if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
267 reg->Allocated = 1;
268 reg->File = RC_FILE_TEMPORARY;
269 reg->Index = hwreg;
270 goto success;
271 }
272 }
273
274 rc_error(s->C, "Ran out of hardware temporaries\n");
275 return;
276
277 success:;
278 }
279
280 /* Rewrite all instructions based on the translation table we built */
281 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
282 inst != &s->C->Program.Instructions;
283 inst = inst->Next) {
284 rc_remap_registers(inst, &remap_register, s);
285 }
286 }
287
288 static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
289 {
290 struct regalloc_state * s = data;
291
292 if (!s->Input[input].Used)
293 return;
294
295 add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
296
297 s->Input[input].Allocated = 1;
298 s->Input[input].File = RC_FILE_TEMPORARY;
299 s->Input[input].Index = hwreg;
300
301 }
302
303 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
304 {
305 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
306 struct regalloc_state s;
307
308 compute_live_intervals(cc, &s);
309
310 c->AllocateHwInputs(c, &alloc_input, &s);
311
312 do_regalloc(&s);
313 }
314
315 /* This functions offsets the temporary register indices by the number
316 * of input registers, because input registers are actually temporaries and
317 * should not occupy the same space.
318 *
319 * This pass is supposed to be used to maintain correct allocation of inputs
320 * if the standard register allocation is disabled. */
321 void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
322 {
323 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
324 struct regalloc_state s;
325 int temp_reg_offset;
326
327 compute_live_intervals(cc, &s);
328
329 c->AllocateHwInputs(c, &alloc_input, &s);
330
331 temp_reg_offset = 0;
332 for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
333 if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
334 temp_reg_offset = s.Input[i].Index + 1;
335 }
336
337 if (temp_reg_offset) {
338 for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
339 if (s.Temporary[i].Used) {
340 s.Temporary[i].Allocated = 1;
341 s.Temporary[i].File = RC_FILE_TEMPORARY;
342 s.Temporary[i].Index = i + temp_reg_offset;
343 }
344 }
345
346 /* Rewrite all registers. */
347 for (struct rc_instruction *inst = cc->Program.Instructions.Next;
348 inst != &cc->Program.Instructions;
349 inst = inst->Next) {
350 rc_remap_registers(inst, &remap_register, &s);
351 }
352 }
353 }