util: Gather some common macros
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_program_pair.h"
30
31 #include <stdio.h>
32
33 #include "main/glheader.h"
34 #include "program/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
37
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
45
46 #define VERBOSE 0
47
48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
49
50
51
52 struct register_info {
53 struct live_intervals Live[4];
54
55 unsigned int Used:1;
56 unsigned int Allocated:1;
57 unsigned int File:3;
58 unsigned int Index:RC_REGISTER_INDEX_BITS;
59 unsigned int Writemask;
60 };
61
62 struct regalloc_state {
63 struct radeon_compiler * C;
64
65 struct register_info * Input;
66 unsigned int NumInputs;
67
68 struct register_info * Temporary;
69 unsigned int NumTemporaries;
70
71 unsigned int Simple;
72 int LoopEnd;
73 };
74
75 struct rc_class {
76 enum rc_reg_class ID;
77
78 unsigned int WritemaskCount;
79
80 /** List of writemasks that belong to this class */
81 unsigned int Writemasks[3];
82
83
84 };
85
86 static const struct rc_class rc_class_list [] = {
87 {RC_REG_CLASS_SINGLE, 3,
88 {RC_MASK_X,
89 RC_MASK_Y,
90 RC_MASK_Z}},
91 {RC_REG_CLASS_DOUBLE, 3,
92 {RC_MASK_X | RC_MASK_Y,
93 RC_MASK_X | RC_MASK_Z,
94 RC_MASK_Y | RC_MASK_Z}},
95 {RC_REG_CLASS_TRIPLE, 1,
96 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
97 RC_MASK_NONE,
98 RC_MASK_NONE}},
99 {RC_REG_CLASS_ALPHA, 1,
100 {RC_MASK_W,
101 RC_MASK_NONE,
102 RC_MASK_NONE}},
103 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
104 {RC_MASK_X | RC_MASK_W,
105 RC_MASK_Y | RC_MASK_W,
106 RC_MASK_Z | RC_MASK_W}},
107 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
108 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
109 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
110 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
111 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
112 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
113 RC_MASK_NONE,
114 RC_MASK_NONE}},
115 {RC_REG_CLASS_X, 1,
116 {RC_MASK_X,
117 RC_MASK_NONE,
118 RC_MASK_NONE}},
119 {RC_REG_CLASS_Y, 1,
120 {RC_MASK_Y,
121 RC_MASK_NONE,
122 RC_MASK_NONE}},
123 {RC_REG_CLASS_Z, 1,
124 {RC_MASK_Z,
125 RC_MASK_NONE,
126 RC_MASK_NONE}},
127 {RC_REG_CLASS_XY, 1,
128 {RC_MASK_X | RC_MASK_Y,
129 RC_MASK_NONE,
130 RC_MASK_NONE}},
131 {RC_REG_CLASS_YZ, 1,
132 {RC_MASK_Y | RC_MASK_Z,
133 RC_MASK_NONE,
134 RC_MASK_NONE}},
135 {RC_REG_CLASS_XZ, 1,
136 {RC_MASK_X | RC_MASK_Z,
137 RC_MASK_NONE,
138 RC_MASK_NONE}},
139 {RC_REG_CLASS_XW, 1,
140 {RC_MASK_X | RC_MASK_W,
141 RC_MASK_NONE,
142 RC_MASK_NONE}},
143 {RC_REG_CLASS_YW, 1,
144 {RC_MASK_Y | RC_MASK_W,
145 RC_MASK_NONE,
146 RC_MASK_NONE}},
147 {RC_REG_CLASS_ZW, 1,
148 {RC_MASK_Z | RC_MASK_W,
149 RC_MASK_NONE,
150 RC_MASK_NONE}},
151 {RC_REG_CLASS_XYW, 1,
152 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
153 RC_MASK_NONE,
154 RC_MASK_NONE}},
155 {RC_REG_CLASS_YZW, 1,
156 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
157 RC_MASK_NONE,
158 RC_MASK_NONE}},
159 {RC_REG_CLASS_XZW, 1,
160 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
161 RC_MASK_NONE,
162 RC_MASK_NONE}}
163 };
164
165 static void print_live_intervals(struct live_intervals * src)
166 {
167 if (!src || !src->Used) {
168 DBG("(null)");
169 return;
170 }
171
172 DBG("(%i,%i)", src->Start, src->End);
173 }
174
175 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
176 {
177 if (VERBOSE) {
178 DBG("overlap_live_intervals: ");
179 print_live_intervals(a);
180 DBG(" to ");
181 print_live_intervals(b);
182 DBG("\n");
183 }
184
185 if (!a->Used || !b->Used) {
186 DBG(" unused interval\n");
187 return 0;
188 }
189
190 if (a->Start > b->Start) {
191 if (a->Start < b->End) {
192 DBG(" overlap\n");
193 return 1;
194 }
195 } else if (b->Start > a->Start) {
196 if (b->Start < a->End) {
197 DBG(" overlap\n");
198 return 1;
199 }
200 } else { /* a->Start == b->Start */
201 if (a->Start != a->End && b->Start != b->End) {
202 DBG(" overlap\n");
203 return 1;
204 }
205 }
206
207 DBG(" no overlap\n");
208
209 return 0;
210 }
211
212 static void scan_read_callback(void * data, struct rc_instruction * inst,
213 rc_register_file file, unsigned int index, unsigned int mask)
214 {
215 struct regalloc_state * s = data;
216 struct register_info * reg;
217 unsigned int i;
218
219 if (file != RC_FILE_INPUT)
220 return;
221
222 s->Input[index].Used = 1;
223 reg = &s->Input[index];
224
225 for (i = 0; i < 4; i++) {
226 if (!((mask >> i) & 0x1)) {
227 continue;
228 }
229 reg->Live[i].Used = 1;
230 reg->Live[i].Start = 0;
231 reg->Live[i].End =
232 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
233 }
234 }
235
236 static void remap_register(void * data, struct rc_instruction * inst,
237 rc_register_file * file, unsigned int * index)
238 {
239 struct regalloc_state * s = data;
240 const struct register_info * reg;
241
242 if (*file == RC_FILE_TEMPORARY && s->Simple)
243 reg = &s->Temporary[*index];
244 else if (*file == RC_FILE_INPUT)
245 reg = &s->Input[*index];
246 else
247 return;
248
249 if (reg->Allocated) {
250 *index = reg->Index;
251 }
252 }
253
254 static void alloc_input_simple(void * data, unsigned int input,
255 unsigned int hwreg)
256 {
257 struct regalloc_state * s = data;
258
259 if (input >= s->NumInputs)
260 return;
261
262 s->Input[input].Allocated = 1;
263 s->Input[input].File = RC_FILE_TEMPORARY;
264 s->Input[input].Index = hwreg;
265 }
266
267 /* This functions offsets the temporary register indices by the number
268 * of input registers, because input registers are actually temporaries and
269 * should not occupy the same space.
270 *
271 * This pass is supposed to be used to maintain correct allocation of inputs
272 * if the standard register allocation is disabled. */
273 static void do_regalloc_inputs_only(struct regalloc_state * s)
274 {
275 for (unsigned i = 0; i < s->NumTemporaries; i++) {
276 s->Temporary[i].Allocated = 1;
277 s->Temporary[i].File = RC_FILE_TEMPORARY;
278 s->Temporary[i].Index = i + s->NumInputs;
279 }
280 }
281
282 static unsigned int is_derivative(rc_opcode op)
283 {
284 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
285 }
286
287 static int find_class(
288 const struct rc_class * classes,
289 unsigned int writemask,
290 unsigned int max_writemask_count)
291 {
292 unsigned int i;
293 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
294 unsigned int j;
295 if (classes[i].WritemaskCount > max_writemask_count) {
296 continue;
297 }
298 for (j = 0; j < 3; j++) {
299 if (classes[i].Writemasks[j] == writemask) {
300 return i;
301 }
302 }
303 }
304 return -1;
305 }
306
307 struct variable_get_class_cb_data {
308 unsigned int * can_change_writemask;
309 unsigned int conversion_swizzle;
310 };
311
312 static void variable_get_class_read_cb(
313 void * userdata,
314 struct rc_instruction * inst,
315 struct rc_pair_instruction_arg * arg,
316 struct rc_pair_instruction_source * src)
317 {
318 struct variable_get_class_cb_data * d = userdata;
319 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
320 d->conversion_swizzle);
321 if (!r300_swizzle_is_native_basic(new_swizzle)) {
322 *d->can_change_writemask = 0;
323 }
324 }
325
326 static enum rc_reg_class variable_get_class(
327 struct rc_variable * variable,
328 const struct rc_class * classes)
329 {
330 unsigned int i;
331 unsigned int can_change_writemask= 1;
332 unsigned int writemask = rc_variable_writemask_sum(variable);
333 struct rc_list * readers = rc_variable_readers_union(variable);
334 int class_index;
335
336 if (!variable->C->is_r500) {
337 struct rc_class c;
338 struct rc_variable * var_ptr;
339 /* The assumption here is that if an instruction has type
340 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341 * r300 and r400 can't swizzle the result of a TEX lookup. */
342 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
343 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
344 writemask = RC_MASK_XYZW;
345 }
346 }
347
348 /* Check if it is possible to do swizzle packing for r300/r400
349 * without creating non-native swizzles. */
350 class_index = find_class(classes, writemask, 3);
351 if (class_index < 0) {
352 goto error;
353 }
354 c = classes[class_index];
355 if (c.WritemaskCount == 1) {
356 goto done;
357 }
358 for (i = 0; i < c.WritemaskCount; i++) {
359 struct rc_variable * var_ptr;
360 for (var_ptr = variable; var_ptr;
361 var_ptr = var_ptr->Friend) {
362 int j;
363 unsigned int conversion_swizzle =
364 rc_make_conversion_swizzle(
365 writemask, c.Writemasks[i]);
366 struct variable_get_class_cb_data d;
367 d.can_change_writemask = &can_change_writemask;
368 d.conversion_swizzle = conversion_swizzle;
369 /* If we get this far var_ptr->Inst has to
370 * be a pair instruction. If variable or any
371 * of its friends are normal instructions,
372 * then the writemask will be set to RC_MASK_XYZW
373 * and the function will return before it gets
374 * here. */
375 rc_pair_for_all_reads_arg(var_ptr->Inst,
376 variable_get_class_read_cb, &d);
377
378 for (j = 0; j < var_ptr->ReaderCount; j++) {
379 unsigned int old_swizzle;
380 unsigned int new_swizzle;
381 struct rc_reader r = var_ptr->Readers[j];
382 if (r.Inst->Type ==
383 RC_INSTRUCTION_PAIR ) {
384 old_swizzle = r.U.P.Arg->Swizzle;
385 } else {
386 /* Source operands of TEX
387 * instructions can't be
388 * swizzle on r300/r400 GPUs.
389 */
390 if (!variable->C->is_r500) {
391 can_change_writemask = 0;
392 break;
393 }
394 old_swizzle = r.U.I.Src->Swizzle;
395 }
396 new_swizzle = rc_adjust_channels(
397 old_swizzle, conversion_swizzle);
398 if (!r300_swizzle_is_native_basic(
399 new_swizzle)) {
400 can_change_writemask = 0;
401 break;
402 }
403 }
404 if (!can_change_writemask) {
405 break;
406 }
407 }
408 if (!can_change_writemask) {
409 break;
410 }
411 }
412 }
413
414 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
415 /* DDX/DDY seem to always fail when their writemasks are
416 * changed.*/
417 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
418 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
419 can_change_writemask = 0;
420 }
421 }
422 for ( ; readers; readers = readers->Next) {
423 struct rc_reader * r = readers->Item;
424 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
425 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
426 can_change_writemask = 0;
427 break;
428 }
429 /* DDX/DDY also fail when their swizzles are changed. */
430 if (is_derivative(r->Inst->U.P.RGB.Opcode)
431 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
432 can_change_writemask = 0;
433 break;
434 }
435 }
436 }
437
438 class_index = find_class(classes, writemask,
439 can_change_writemask ? 3 : 1);
440 done:
441 if (class_index > -1) {
442 return classes[class_index].ID;
443 } else {
444 error:
445 rc_error(variable->C,
446 "Could not find class for index=%u mask=%u\n",
447 variable->Dst.Index, writemask);
448 return 0;
449 }
450 }
451
452 static unsigned int overlap_live_intervals_array(
453 struct live_intervals * a,
454 struct live_intervals * b)
455 {
456 unsigned int a_chan, b_chan;
457 for (a_chan = 0; a_chan < 4; a_chan++) {
458 for (b_chan = 0; b_chan < 4; b_chan++) {
459 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
460 return 1;
461 }
462 }
463 }
464 return 0;
465 }
466
467 static unsigned int reg_get_index(int reg)
468 {
469 return reg / RC_MASK_XYZW;
470 }
471
472 static unsigned int reg_get_writemask(int reg)
473 {
474 return (reg % RC_MASK_XYZW) + 1;
475 }
476
477 static int get_reg_id(unsigned int index, unsigned int writemask)
478 {
479 assert(writemask);
480 if (writemask == 0) {
481 return 0;
482 }
483 return (index * RC_MASK_XYZW) + (writemask - 1);
484 }
485
486 #if VERBOSE
487 static void print_reg(int reg)
488 {
489 unsigned int index = reg_get_index(reg);
490 unsigned int mask = reg_get_writemask(reg);
491 fprintf(stderr, "Temp[%u].%c%c%c%c", index,
492 mask & RC_MASK_X ? 'x' : '_',
493 mask & RC_MASK_Y ? 'y' : '_',
494 mask & RC_MASK_Z ? 'z' : '_',
495 mask & RC_MASK_W ? 'w' : '_');
496 }
497 #endif
498
499 static void add_register_conflicts(
500 struct ra_regs * regs,
501 unsigned int max_temp_regs)
502 {
503 unsigned int index, a_mask, b_mask;
504 for (index = 0; index < max_temp_regs; index++) {
505 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
506 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
507 b_mask++) {
508 if (a_mask & b_mask) {
509 ra_add_reg_conflict(regs,
510 get_reg_id(index, a_mask),
511 get_reg_id(index, b_mask));
512 }
513 }
514 }
515 }
516 }
517
518 static void do_advanced_regalloc(struct regalloc_state * s)
519 {
520
521 unsigned int i, input_node, node_count, node_index;
522 unsigned int * node_classes;
523 struct rc_instruction * inst;
524 struct rc_list * var_ptr;
525 struct rc_list * variables;
526 struct ra_graph * graph;
527 const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
528
529 /* Get list of program variables */
530 variables = rc_get_variables(s->C);
531 node_count = rc_list_count(variables);
532 node_classes = memory_pool_malloc(&s->C->Pool,
533 node_count * sizeof(unsigned int));
534
535 for (var_ptr = variables, node_index = 0; var_ptr;
536 var_ptr = var_ptr->Next, node_index++) {
537 unsigned int class_index;
538 /* Compute the live intervals */
539 rc_variable_compute_live_intervals(var_ptr->Item);
540
541 class_index = variable_get_class(var_ptr->Item, rc_class_list);
542 node_classes[node_index] = ra_state->class_ids[class_index];
543 }
544
545
546 /* Calculate live intervals for input registers */
547 for (inst = s->C->Program.Instructions.Next;
548 inst != &s->C->Program.Instructions;
549 inst = inst->Next) {
550 rc_opcode op = rc_get_flow_control_inst(inst);
551 if (op == RC_OPCODE_BGNLOOP) {
552 struct rc_instruction * endloop =
553 rc_match_bgnloop(inst);
554 if (endloop->IP > s->LoopEnd) {
555 s->LoopEnd = endloop->IP;
556 }
557 }
558 rc_for_all_reads_mask(inst, scan_read_callback, s);
559 }
560
561 /* Compute the writemask for inputs. */
562 for (i = 0; i < s->NumInputs; i++) {
563 unsigned int chan, writemask = 0;
564 for (chan = 0; chan < 4; chan++) {
565 if (s->Input[i].Live[chan].Used) {
566 writemask |= (1 << chan);
567 }
568 }
569 s->Input[i].Writemask = writemask;
570 }
571
572 graph = ra_alloc_interference_graph(ra_state->regs,
573 node_count + s->NumInputs);
574
575 /* Build the interference graph */
576 for (var_ptr = variables, node_index = 0; var_ptr;
577 var_ptr = var_ptr->Next,node_index++) {
578 struct rc_list * a, * b;
579 unsigned int b_index;
580
581 ra_set_node_class(graph, node_index, node_classes[node_index]);
582
583 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
584 b; b = b->Next, b_index++) {
585 struct rc_variable * var_a = a->Item;
586 while (var_a) {
587 struct rc_variable * var_b = b->Item;
588 while (var_b) {
589 if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
590 ra_add_node_interference(graph,
591 node_index, b_index);
592 }
593 var_b = var_b->Friend;
594 }
595 var_a = var_a->Friend;
596 }
597 }
598 }
599
600 /* Add input registers to the interference graph */
601 for (i = 0, input_node = 0; i< s->NumInputs; i++) {
602 if (!s->Input[i].Writemask) {
603 continue;
604 }
605 for (var_ptr = variables, node_index = 0;
606 var_ptr; var_ptr = var_ptr->Next, node_index++) {
607 struct rc_variable * var = var_ptr->Item;
608 if (overlap_live_intervals_array(s->Input[i].Live,
609 var->Live)) {
610 ra_add_node_interference(graph, node_index,
611 node_count + input_node);
612 }
613 }
614 /* Manually allocate a register for this input */
615 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
616 s->Input[i].Index, s->Input[i].Writemask));
617 input_node++;
618 }
619
620 if (!ra_allocate_no_spills(graph)) {
621 rc_error(s->C, "Ran out of hardware temporaries\n");
622 return;
623 }
624
625 /* Rewrite the registers */
626 for (var_ptr = variables, node_index = 0; var_ptr;
627 var_ptr = var_ptr->Next, node_index++) {
628 int reg = ra_get_node_reg(graph, node_index);
629 unsigned int writemask = reg_get_writemask(reg);
630 unsigned int index = reg_get_index(reg);
631 struct rc_variable * var = var_ptr->Item;
632
633 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
634 writemask = rc_variable_writemask_sum(var);
635 }
636
637 if (var->Dst.File == RC_FILE_INPUT) {
638 continue;
639 }
640 rc_variable_change_dst(var, index, writemask);
641 }
642
643 ralloc_free(graph);
644 }
645
646 void rc_init_regalloc_state(struct rc_regalloc_state *s)
647 {
648 unsigned i, j, index;
649 unsigned **ra_q_values;
650
651 /* Pre-computed q values. This array describes the maximum number of
652 * a class's [row] registers that are in conflict with a single
653 * register from another class [column].
654 *
655 * For example:
656 * q_values[0][2] is 3, because a register from class 2
657 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
658 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
659 * and T0.z.
660 *
661 * q_values[2][0] is 1, because a register from class 0
662 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
663 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
664 *
665 * The q values for each register class [row] will never be greater
666 * than the maximum number of writemask combinations for that class.
667 *
668 * For example:
669 *
670 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
671 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
672 * than 1.
673 */
674 const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
675 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
676 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
677 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
678 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
679 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
680 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
681 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
682 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
683 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
684 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
685 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
686 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
687 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
688 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
689 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
690 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
691 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
693 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
694 };
695
696 /* Allocate the main ra data structure */
697 s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW);
698
699 /* Create the register classes */
700 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
701 const struct rc_class *class = &rc_class_list[i];
702 s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
703
704 /* Assign registers to the classes */
705 for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
706 for (j = 0; j < class->WritemaskCount; j++) {
707 int reg_id = get_reg_id(index,
708 class->Writemasks[j]);
709 ra_class_add_reg(s->regs,
710 s->class_ids[class->ID], reg_id);
711 }
712 }
713 }
714
715 /* Set the q values. The q_values array is indexed based on
716 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
717 * different than the ID assigned to that class by ra.
718 * This why we need to manually construct this list.
719 */
720 ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
721
722 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
723 ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
724 for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
725 ra_q_values[s->class_ids[i]][s->class_ids[j]] =
726 q_values[i][j];
727 }
728 }
729
730 /* Add register conflicts */
731 add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
732
733 ra_set_finalize(s->regs, ra_q_values);
734
735 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
736 FREE(ra_q_values[i]);
737 }
738 FREE(ra_q_values);
739 }
740
741 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
742 {
743 ralloc_free(s->regs);
744 }
745
746 /**
747 * @param user This parameter should be a pointer to an integer value. If this
748 * integer value is zero, then a simple register allocator will be used that
749 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
750 * user is non-zero, then the regular register allocator will be used
751 * (\sa do_regalloc).
752 */
753 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
754 {
755 struct r300_fragment_program_compiler *c =
756 (struct r300_fragment_program_compiler*)cc;
757 struct regalloc_state s;
758 int * do_full_regalloc = (int*)user;
759
760 memset(&s, 0, sizeof(s));
761 s.C = cc;
762 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
763 s.Input = memory_pool_malloc(&cc->Pool,
764 s.NumInputs * sizeof(struct register_info));
765 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
766
767 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
768 s.Temporary = memory_pool_malloc(&cc->Pool,
769 s.NumTemporaries * sizeof(struct register_info));
770 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
771
772 rc_recompute_ips(s.C);
773
774 c->AllocateHwInputs(c, &alloc_input_simple, &s);
775 if (*do_full_regalloc) {
776 do_advanced_regalloc(&s);
777 } else {
778 s.Simple = 1;
779 do_regalloc_inputs_only(&s);
780 }
781
782 /* Rewrite inputs and if we are doing the simple allocation, rewrite
783 * temporaries too. */
784 for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
785 inst != &s.C->Program.Instructions;
786 inst = inst->Next) {
787 rc_remap_registers(inst, &remap_register, &s);
788 }
789 }