r300/compiler: refactor fragment shader compilation
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_dataflow.h"
34
35
36 #define VERBOSE 0
37
38 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
39
40
41 struct live_intervals {
42 int Start;
43 int End;
44 struct live_intervals * Next;
45 };
46
47 struct register_info {
48 struct live_intervals Live;
49
50 unsigned int Used:1;
51 unsigned int Allocated:1;
52 unsigned int File:3;
53 unsigned int Index:RC_REGISTER_INDEX_BITS;
54 };
55
56 struct hardware_register {
57 struct live_intervals * Used;
58 };
59
60 struct regalloc_state {
61 struct radeon_compiler * C;
62
63 struct register_info Input[RC_REGISTER_MAX_INDEX];
64 struct register_info Temporary[RC_REGISTER_MAX_INDEX];
65
66 struct hardware_register * HwTemporary;
67 unsigned int NumHwTemporaries;
68 /**
69 * If an instruction is inside of a loop, end_loop will be the
70 * IP of the ENDLOOP instruction, otherwise end_loop will be 0
71 */
72 int end_loop;
73 };
74
75 static void print_live_intervals(struct live_intervals * src)
76 {
77 if (!src) {
78 DBG("(null)");
79 return;
80 }
81
82 while(src) {
83 DBG("(%i,%i)", src->Start, src->End);
84 src = src->Next;
85 }
86 }
87
88 static void add_live_intervals(struct regalloc_state * s,
89 struct live_intervals ** dst, struct live_intervals * src)
90 {
91 struct live_intervals ** dst_backup = dst;
92
93 if (VERBOSE) {
94 DBG("add_live_intervals: ");
95 print_live_intervals(*dst);
96 DBG(" to ");
97 print_live_intervals(src);
98 DBG("\n");
99 }
100
101 while(src) {
102 if (*dst && (*dst)->End < src->Start) {
103 dst = &(*dst)->Next;
104 } else if (!*dst || (*dst)->Start > src->End) {
105 struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
106 li->Start = src->Start;
107 li->End = src->End;
108 li->Next = *dst;
109 *dst = li;
110 src = src->Next;
111 } else {
112 if (src->End > (*dst)->End)
113 (*dst)->End = src->End;
114 if (src->Start < (*dst)->Start)
115 (*dst)->Start = src->Start;
116 src = src->Next;
117 }
118 }
119
120 if (VERBOSE) {
121 DBG(" result: ");
122 print_live_intervals(*dst_backup);
123 DBG("\n");
124 }
125 }
126
127 static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
128 {
129 if (VERBOSE) {
130 DBG("overlap_live_intervals: ");
131 print_live_intervals(dst);
132 DBG(" to ");
133 print_live_intervals(src);
134 DBG("\n");
135 }
136
137 while(src && dst) {
138 if (dst->End <= src->Start) {
139 dst = dst->Next;
140 } else if (dst->End <= src->End) {
141 DBG(" overlap\n");
142 return 1;
143 } else if (dst->Start < src->End) {
144 DBG(" overlap\n");
145 return 1;
146 } else {
147 src = src->Next;
148 }
149 }
150
151 DBG(" no overlap\n");
152
153 return 0;
154 }
155
156 static int try_add_live_intervals(struct regalloc_state * s,
157 struct live_intervals ** dst, struct live_intervals * src)
158 {
159 if (overlap_live_intervals(*dst, src))
160 return 0;
161
162 add_live_intervals(s, dst, src);
163 return 1;
164 }
165
166 static void scan_callback(void * data, struct rc_instruction * inst,
167 rc_register_file file, unsigned int index, unsigned int mask)
168 {
169 struct regalloc_state * s = data;
170 struct register_info * reg;
171
172 if (file == RC_FILE_TEMPORARY)
173 reg = &s->Temporary[index];
174 else if (file == RC_FILE_INPUT)
175 reg = &s->Input[index];
176 else
177 return;
178
179 if (!reg->Used) {
180 reg->Used = 1;
181 if (file == RC_FILE_INPUT)
182 reg->Live.Start = -1;
183 else
184 reg->Live.Start = inst->IP;
185 reg->Live.End = inst->IP;
186 } else if (s->end_loop)
187 reg->Live.End = s->end_loop;
188 else if (inst->IP > reg->Live.End)
189 reg->Live.End = inst->IP;
190 }
191
192 static void compute_live_intervals(struct regalloc_state * s)
193 {
194 rc_recompute_ips(s->C);
195
196 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
197 inst != &s->C->Program.Instructions;
198 inst = inst->Next) {
199
200 /* For all instructions inside of a loop, the ENDLOOP
201 * instruction is used as the end of the live interval. */
202 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
203 int loops = 1;
204 struct rc_instruction * tmp;
205 for(tmp = inst->Next;
206 tmp != &s->C->Program.Instructions;
207 tmp = tmp->Next) {
208 if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
209 loops++;
210 break;
211 } else if (tmp->U.I.Opcode
212 == RC_OPCODE_ENDLOOP) {
213 if(!--loops) {
214 s->end_loop = tmp->IP;
215 break;
216 }
217 }
218 }
219 }
220
221 if (inst->IP == s->end_loop)
222 s->end_loop = 0;
223
224 rc_for_all_reads_mask(inst, scan_callback, s);
225 rc_for_all_writes_mask(inst, scan_callback, s);
226 }
227 }
228
229 static void remap_register(void * data, struct rc_instruction * inst,
230 rc_register_file * file, unsigned int * index)
231 {
232 struct regalloc_state * s = data;
233 const struct register_info * reg;
234
235 if (*file == RC_FILE_TEMPORARY)
236 reg = &s->Temporary[*index];
237 else if (*file == RC_FILE_INPUT)
238 reg = &s->Input[*index];
239 else
240 return;
241
242 if (reg->Allocated) {
243 *file = reg->File;
244 *index = reg->Index;
245 }
246 }
247
248 static void do_regalloc(struct regalloc_state * s)
249 {
250 /* Simple and stupid greedy register allocation */
251 for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
252 struct register_info * reg = &s->Temporary[index];
253
254 if (!reg->Used)
255 continue;
256
257 for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
258 if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
259 reg->Allocated = 1;
260 reg->File = RC_FILE_TEMPORARY;
261 reg->Index = hwreg;
262 goto success;
263 }
264 }
265
266 rc_error(s->C, "Ran out of hardware temporaries\n");
267 return;
268
269 success:;
270 }
271
272 /* Rewrite all instructions based on the translation table we built */
273 for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
274 inst != &s->C->Program.Instructions;
275 inst = inst->Next) {
276 rc_remap_registers(inst, &remap_register, s);
277 }
278 }
279
280 static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
281 {
282 struct regalloc_state * s = data;
283
284 if (!s->Input[input].Used)
285 return;
286
287 add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
288
289 s->Input[input].Allocated = 1;
290 s->Input[input].File = RC_FILE_TEMPORARY;
291 s->Input[input].Index = hwreg;
292
293 }
294
295 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
296 {
297 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
298 unsigned maxtemps = c->Base.max_temp_regs;
299 struct regalloc_state s;
300
301 memset(&s, 0, sizeof(s));
302 s.C = &c->Base;
303 s.NumHwTemporaries = maxtemps;
304 s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
305 memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
306
307 compute_live_intervals(&s);
308
309 c->AllocateHwInputs(c, &alloc_input, &s);
310
311 do_regalloc(&s);
312 }