gallium: Add capability for ARB_robust_buffer_access_behavior.
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_program_pair.h"
30
31 #include <stdio.h>
32
33 #include "main/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
37
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
45
46 #define VERBOSE 0
47
48 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
49
50
51
52 struct register_info {
53 struct live_intervals Live[4];
54
55 unsigned int Used:1;
56 unsigned int Allocated:1;
57 unsigned int File:3;
58 unsigned int Index:RC_REGISTER_INDEX_BITS;
59 unsigned int Writemask;
60 };
61
62 struct regalloc_state {
63 struct radeon_compiler * C;
64
65 struct register_info * Input;
66 unsigned int NumInputs;
67
68 struct register_info * Temporary;
69 unsigned int NumTemporaries;
70
71 unsigned int Simple;
72 int LoopEnd;
73 };
74
75 struct rc_class {
76 enum rc_reg_class ID;
77
78 unsigned int WritemaskCount;
79
80 /** List of writemasks that belong to this class */
81 unsigned int Writemasks[3];
82
83
84 };
85
86 static const struct rc_class rc_class_list [] = {
87 {RC_REG_CLASS_SINGLE, 3,
88 {RC_MASK_X,
89 RC_MASK_Y,
90 RC_MASK_Z}},
91 {RC_REG_CLASS_DOUBLE, 3,
92 {RC_MASK_X | RC_MASK_Y,
93 RC_MASK_X | RC_MASK_Z,
94 RC_MASK_Y | RC_MASK_Z}},
95 {RC_REG_CLASS_TRIPLE, 1,
96 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
97 RC_MASK_NONE,
98 RC_MASK_NONE}},
99 {RC_REG_CLASS_ALPHA, 1,
100 {RC_MASK_W,
101 RC_MASK_NONE,
102 RC_MASK_NONE}},
103 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
104 {RC_MASK_X | RC_MASK_W,
105 RC_MASK_Y | RC_MASK_W,
106 RC_MASK_Z | RC_MASK_W}},
107 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
108 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
109 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
110 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
111 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
112 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
113 RC_MASK_NONE,
114 RC_MASK_NONE}},
115 {RC_REG_CLASS_X, 1,
116 {RC_MASK_X,
117 RC_MASK_NONE,
118 RC_MASK_NONE}},
119 {RC_REG_CLASS_Y, 1,
120 {RC_MASK_Y,
121 RC_MASK_NONE,
122 RC_MASK_NONE}},
123 {RC_REG_CLASS_Z, 1,
124 {RC_MASK_Z,
125 RC_MASK_NONE,
126 RC_MASK_NONE}},
127 {RC_REG_CLASS_XY, 1,
128 {RC_MASK_X | RC_MASK_Y,
129 RC_MASK_NONE,
130 RC_MASK_NONE}},
131 {RC_REG_CLASS_YZ, 1,
132 {RC_MASK_Y | RC_MASK_Z,
133 RC_MASK_NONE,
134 RC_MASK_NONE}},
135 {RC_REG_CLASS_XZ, 1,
136 {RC_MASK_X | RC_MASK_Z,
137 RC_MASK_NONE,
138 RC_MASK_NONE}},
139 {RC_REG_CLASS_XW, 1,
140 {RC_MASK_X | RC_MASK_W,
141 RC_MASK_NONE,
142 RC_MASK_NONE}},
143 {RC_REG_CLASS_YW, 1,
144 {RC_MASK_Y | RC_MASK_W,
145 RC_MASK_NONE,
146 RC_MASK_NONE}},
147 {RC_REG_CLASS_ZW, 1,
148 {RC_MASK_Z | RC_MASK_W,
149 RC_MASK_NONE,
150 RC_MASK_NONE}},
151 {RC_REG_CLASS_XYW, 1,
152 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
153 RC_MASK_NONE,
154 RC_MASK_NONE}},
155 {RC_REG_CLASS_YZW, 1,
156 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
157 RC_MASK_NONE,
158 RC_MASK_NONE}},
159 {RC_REG_CLASS_XZW, 1,
160 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
161 RC_MASK_NONE,
162 RC_MASK_NONE}}
163 };
164
165 static void print_live_intervals(struct live_intervals * src)
166 {
167 if (!src || !src->Used) {
168 DBG("(null)");
169 return;
170 }
171
172 DBG("(%i,%i)", src->Start, src->End);
173 }
174
175 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
176 {
177 if (VERBOSE) {
178 DBG("overlap_live_intervals: ");
179 print_live_intervals(a);
180 DBG(" to ");
181 print_live_intervals(b);
182 DBG("\n");
183 }
184
185 if (!a->Used || !b->Used) {
186 DBG(" unused interval\n");
187 return 0;
188 }
189
190 if (a->Start > b->Start) {
191 if (a->Start < b->End) {
192 DBG(" overlap\n");
193 return 1;
194 }
195 } else if (b->Start > a->Start) {
196 if (b->Start < a->End) {
197 DBG(" overlap\n");
198 return 1;
199 }
200 } else { /* a->Start == b->Start */
201 if (a->Start != a->End && b->Start != b->End) {
202 DBG(" overlap\n");
203 return 1;
204 }
205 }
206
207 DBG(" no overlap\n");
208
209 return 0;
210 }
211
212 static void scan_read_callback(void * data, struct rc_instruction * inst,
213 rc_register_file file, unsigned int index, unsigned int mask)
214 {
215 struct regalloc_state * s = data;
216 struct register_info * reg;
217 unsigned int i;
218
219 if (file != RC_FILE_INPUT)
220 return;
221
222 s->Input[index].Used = 1;
223 reg = &s->Input[index];
224
225 for (i = 0; i < 4; i++) {
226 if (!((mask >> i) & 0x1)) {
227 continue;
228 }
229 reg->Live[i].Used = 1;
230 reg->Live[i].Start = 0;
231 reg->Live[i].End =
232 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
233 }
234 }
235
236 static void remap_register(void * data, struct rc_instruction * inst,
237 rc_register_file * file, unsigned int * index)
238 {
239 struct regalloc_state * s = data;
240 const struct register_info * reg;
241
242 if (*file == RC_FILE_TEMPORARY && s->Simple)
243 reg = &s->Temporary[*index];
244 else if (*file == RC_FILE_INPUT)
245 reg = &s->Input[*index];
246 else
247 return;
248
249 if (reg->Allocated) {
250 *index = reg->Index;
251 }
252 }
253
254 static void alloc_input_simple(void * data, unsigned int input,
255 unsigned int hwreg)
256 {
257 struct regalloc_state * s = data;
258
259 if (input >= s->NumInputs)
260 return;
261
262 s->Input[input].Allocated = 1;
263 s->Input[input].File = RC_FILE_TEMPORARY;
264 s->Input[input].Index = hwreg;
265 }
266
267 /* This functions offsets the temporary register indices by the number
268 * of input registers, because input registers are actually temporaries and
269 * should not occupy the same space.
270 *
271 * This pass is supposed to be used to maintain correct allocation of inputs
272 * if the standard register allocation is disabled. */
273 static void do_regalloc_inputs_only(struct regalloc_state * s)
274 {
275 for (unsigned i = 0; i < s->NumTemporaries; i++) {
276 s->Temporary[i].Allocated = 1;
277 s->Temporary[i].File = RC_FILE_TEMPORARY;
278 s->Temporary[i].Index = i + s->NumInputs;
279 }
280 }
281
282 static unsigned int is_derivative(rc_opcode op)
283 {
284 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
285 }
286
287 static int find_class(
288 const struct rc_class * classes,
289 unsigned int writemask,
290 unsigned int max_writemask_count)
291 {
292 unsigned int i;
293 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
294 unsigned int j;
295 if (classes[i].WritemaskCount > max_writemask_count) {
296 continue;
297 }
298 for (j = 0; j < 3; j++) {
299 if (classes[i].Writemasks[j] == writemask) {
300 return i;
301 }
302 }
303 }
304 return -1;
305 }
306
307 struct variable_get_class_cb_data {
308 unsigned int * can_change_writemask;
309 unsigned int conversion_swizzle;
310 };
311
312 static void variable_get_class_read_cb(
313 void * userdata,
314 struct rc_instruction * inst,
315 struct rc_pair_instruction_arg * arg,
316 struct rc_pair_instruction_source * src)
317 {
318 struct variable_get_class_cb_data * d = userdata;
319 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
320 d->conversion_swizzle);
321 if (!r300_swizzle_is_native_basic(new_swizzle)) {
322 *d->can_change_writemask = 0;
323 }
324 }
325
326 static enum rc_reg_class variable_get_class(
327 struct rc_variable * variable,
328 const struct rc_class * classes)
329 {
330 unsigned int i;
331 unsigned int can_change_writemask= 1;
332 unsigned int writemask = rc_variable_writemask_sum(variable);
333 struct rc_list * readers = rc_variable_readers_union(variable);
334 int class_index;
335
336 if (!variable->C->is_r500) {
337 struct rc_class c;
338 struct rc_variable * var_ptr;
339 /* The assumption here is that if an instruction has type
340 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341 * r300 and r400 can't swizzle the result of a TEX lookup. */
342 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
343 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
344 writemask = RC_MASK_XYZW;
345 }
346 }
347
348 /* Check if it is possible to do swizzle packing for r300/r400
349 * without creating non-native swizzles. */
350 class_index = find_class(classes, writemask, 3);
351 if (class_index < 0) {
352 goto error;
353 }
354 c = classes[class_index];
355 if (c.WritemaskCount == 1) {
356 goto done;
357 }
358 for (i = 0; i < c.WritemaskCount; i++) {
359 struct rc_variable * var_ptr;
360 for (var_ptr = variable; var_ptr;
361 var_ptr = var_ptr->Friend) {
362 int j;
363 unsigned int conversion_swizzle =
364 rc_make_conversion_swizzle(
365 writemask, c.Writemasks[i]);
366 struct variable_get_class_cb_data d;
367 d.can_change_writemask = &can_change_writemask;
368 d.conversion_swizzle = conversion_swizzle;
369 /* If we get this far var_ptr->Inst has to
370 * be a pair instruction. If variable or any
371 * of its friends are normal instructions,
372 * then the writemask will be set to RC_MASK_XYZW
373 * and the function will return before it gets
374 * here. */
375 rc_pair_for_all_reads_arg(var_ptr->Inst,
376 variable_get_class_read_cb, &d);
377
378 for (j = 0; j < var_ptr->ReaderCount; j++) {
379 unsigned int old_swizzle;
380 unsigned int new_swizzle;
381 struct rc_reader r = var_ptr->Readers[j];
382 if (r.Inst->Type ==
383 RC_INSTRUCTION_PAIR ) {
384 old_swizzle = r.U.P.Arg->Swizzle;
385 } else {
386 /* Source operands of TEX
387 * instructions can't be
388 * swizzle on r300/r400 GPUs.
389 */
390 can_change_writemask = 0;
391 break;
392 }
393 new_swizzle = rc_adjust_channels(
394 old_swizzle, conversion_swizzle);
395 if (!r300_swizzle_is_native_basic(
396 new_swizzle)) {
397 can_change_writemask = 0;
398 break;
399 }
400 }
401 if (!can_change_writemask) {
402 break;
403 }
404 }
405 if (!can_change_writemask) {
406 break;
407 }
408 }
409 }
410
411 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
412 /* DDX/DDY seem to always fail when their writemasks are
413 * changed.*/
414 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
415 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
416 can_change_writemask = 0;
417 }
418 }
419 for ( ; readers; readers = readers->Next) {
420 struct rc_reader * r = readers->Item;
421 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
422 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
423 can_change_writemask = 0;
424 break;
425 }
426 /* DDX/DDY also fail when their swizzles are changed. */
427 if (is_derivative(r->Inst->U.P.RGB.Opcode)
428 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
429 can_change_writemask = 0;
430 break;
431 }
432 }
433 }
434
435 class_index = find_class(classes, writemask,
436 can_change_writemask ? 3 : 1);
437 done:
438 if (class_index > -1) {
439 return classes[class_index].ID;
440 } else {
441 error:
442 rc_error(variable->C,
443 "Could not find class for index=%u mask=%u\n",
444 variable->Dst.Index, writemask);
445 return 0;
446 }
447 }
448
449 static unsigned int overlap_live_intervals_array(
450 struct live_intervals * a,
451 struct live_intervals * b)
452 {
453 unsigned int a_chan, b_chan;
454 for (a_chan = 0; a_chan < 4; a_chan++) {
455 for (b_chan = 0; b_chan < 4; b_chan++) {
456 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
457 return 1;
458 }
459 }
460 }
461 return 0;
462 }
463
464 static unsigned int reg_get_index(int reg)
465 {
466 return reg / RC_MASK_XYZW;
467 }
468
469 static unsigned int reg_get_writemask(int reg)
470 {
471 return (reg % RC_MASK_XYZW) + 1;
472 }
473
474 static int get_reg_id(unsigned int index, unsigned int writemask)
475 {
476 assert(writemask);
477 if (writemask == 0) {
478 return 0;
479 }
480 return (index * RC_MASK_XYZW) + (writemask - 1);
481 }
482
483 #if VERBOSE
484 static void print_reg(int reg)
485 {
486 unsigned int index = reg_get_index(reg);
487 unsigned int mask = reg_get_writemask(reg);
488 fprintf(stderr, "Temp[%u].%c%c%c%c", index,
489 mask & RC_MASK_X ? 'x' : '_',
490 mask & RC_MASK_Y ? 'y' : '_',
491 mask & RC_MASK_Z ? 'z' : '_',
492 mask & RC_MASK_W ? 'w' : '_');
493 }
494 #endif
495
496 static void add_register_conflicts(
497 struct ra_regs * regs,
498 unsigned int max_temp_regs)
499 {
500 unsigned int index, a_mask, b_mask;
501 for (index = 0; index < max_temp_regs; index++) {
502 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
503 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
504 b_mask++) {
505 if (a_mask & b_mask) {
506 ra_add_reg_conflict(regs,
507 get_reg_id(index, a_mask),
508 get_reg_id(index, b_mask));
509 }
510 }
511 }
512 }
513 }
514
515 static void do_advanced_regalloc(struct regalloc_state * s)
516 {
517
518 unsigned int i, input_node, node_count, node_index;
519 unsigned int * node_classes;
520 struct rc_instruction * inst;
521 struct rc_list * var_ptr;
522 struct rc_list * variables;
523 struct ra_graph * graph;
524 const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
525
526 /* Get list of program variables */
527 variables = rc_get_variables(s->C);
528 node_count = rc_list_count(variables);
529 node_classes = memory_pool_malloc(&s->C->Pool,
530 node_count * sizeof(unsigned int));
531
532 for (var_ptr = variables, node_index = 0; var_ptr;
533 var_ptr = var_ptr->Next, node_index++) {
534 unsigned int class_index;
535 /* Compute the live intervals */
536 rc_variable_compute_live_intervals(var_ptr->Item);
537
538 class_index = variable_get_class(var_ptr->Item, rc_class_list);
539 node_classes[node_index] = ra_state->class_ids[class_index];
540 }
541
542
543 /* Calculate live intervals for input registers */
544 for (inst = s->C->Program.Instructions.Next;
545 inst != &s->C->Program.Instructions;
546 inst = inst->Next) {
547 rc_opcode op = rc_get_flow_control_inst(inst);
548 if (op == RC_OPCODE_BGNLOOP) {
549 struct rc_instruction * endloop =
550 rc_match_bgnloop(inst);
551 if (endloop->IP > s->LoopEnd) {
552 s->LoopEnd = endloop->IP;
553 }
554 }
555 rc_for_all_reads_mask(inst, scan_read_callback, s);
556 }
557
558 /* Compute the writemask for inputs. */
559 for (i = 0; i < s->NumInputs; i++) {
560 unsigned int chan, writemask = 0;
561 for (chan = 0; chan < 4; chan++) {
562 if (s->Input[i].Live[chan].Used) {
563 writemask |= (1 << chan);
564 }
565 }
566 s->Input[i].Writemask = writemask;
567 }
568
569 graph = ra_alloc_interference_graph(ra_state->regs,
570 node_count + s->NumInputs);
571
572 for (node_index = 0; node_index < node_count; node_index++) {
573 ra_set_node_class(graph, node_index, node_classes[node_index]);
574 }
575
576 /* Build the interference graph */
577 for (var_ptr = variables, node_index = 0; var_ptr;
578 var_ptr = var_ptr->Next,node_index++) {
579 struct rc_list * a, * b;
580 unsigned int b_index;
581
582 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
583 b; b = b->Next, b_index++) {
584 struct rc_variable * var_a = a->Item;
585 while (var_a) {
586 struct rc_variable * var_b = b->Item;
587 while (var_b) {
588 if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
589 ra_add_node_interference(graph,
590 node_index, b_index);
591 }
592 var_b = var_b->Friend;
593 }
594 var_a = var_a->Friend;
595 }
596 }
597 }
598
599 /* Add input registers to the interference graph */
600 for (i = 0, input_node = 0; i< s->NumInputs; i++) {
601 if (!s->Input[i].Writemask) {
602 continue;
603 }
604 for (var_ptr = variables, node_index = 0;
605 var_ptr; var_ptr = var_ptr->Next, node_index++) {
606 struct rc_variable * var = var_ptr->Item;
607 if (overlap_live_intervals_array(s->Input[i].Live,
608 var->Live)) {
609 ra_add_node_interference(graph, node_index,
610 node_count + input_node);
611 }
612 }
613 /* Manually allocate a register for this input */
614 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
615 s->Input[i].Index, s->Input[i].Writemask));
616 input_node++;
617 }
618
619 if (!ra_allocate(graph)) {
620 rc_error(s->C, "Ran out of hardware temporaries\n");
621 return;
622 }
623
624 /* Rewrite the registers */
625 for (var_ptr = variables, node_index = 0; var_ptr;
626 var_ptr = var_ptr->Next, node_index++) {
627 int reg = ra_get_node_reg(graph, node_index);
628 unsigned int writemask = reg_get_writemask(reg);
629 unsigned int index = reg_get_index(reg);
630 struct rc_variable * var = var_ptr->Item;
631
632 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
633 writemask = rc_variable_writemask_sum(var);
634 }
635
636 if (var->Dst.File == RC_FILE_INPUT) {
637 continue;
638 }
639 rc_variable_change_dst(var, index, writemask);
640 }
641
642 ralloc_free(graph);
643 }
644
645 void rc_init_regalloc_state(struct rc_regalloc_state *s)
646 {
647 unsigned i, j, index;
648 unsigned **ra_q_values;
649
650 /* Pre-computed q values. This array describes the maximum number of
651 * a class's [row] registers that are in conflict with a single
652 * register from another class [column].
653 *
654 * For example:
655 * q_values[0][2] is 3, because a register from class 2
656 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
657 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
658 * and T0.z.
659 *
660 * q_values[2][0] is 1, because a register from class 0
661 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
662 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
663 *
664 * The q values for each register class [row] will never be greater
665 * than the maximum number of writemask combinations for that class.
666 *
667 * For example:
668 *
669 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
670 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
671 * than 1.
672 */
673 const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
674 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
675 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
676 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
677 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
678 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
679 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
680 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
681 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
682 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
683 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
684 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
685 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
686 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
687 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
688 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
689 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
690 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
691 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
693 };
694
695 /* Allocate the main ra data structure */
696 s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
697 true);
698
699 /* Create the register classes */
700 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
701 const struct rc_class *class = &rc_class_list[i];
702 s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
703
704 /* Assign registers to the classes */
705 for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
706 for (j = 0; j < class->WritemaskCount; j++) {
707 int reg_id = get_reg_id(index,
708 class->Writemasks[j]);
709 ra_class_add_reg(s->regs,
710 s->class_ids[class->ID], reg_id);
711 }
712 }
713 }
714
715 /* Set the q values. The q_values array is indexed based on
716 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
717 * different than the ID assigned to that class by ra.
718 * This why we need to manually construct this list.
719 */
720 ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
721
722 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
723 ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
724 for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
725 ra_q_values[s->class_ids[i]][s->class_ids[j]] =
726 q_values[i][j];
727 }
728 }
729
730 /* Add register conflicts */
731 add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
732
733 ra_set_finalize(s->regs, ra_q_values);
734
735 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
736 FREE(ra_q_values[i]);
737 }
738 FREE(ra_q_values);
739 }
740
741 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
742 {
743 ralloc_free(s->regs);
744 }
745
746 /**
747 * @param user This parameter should be a pointer to an integer value. If this
748 * integer value is zero, then a simple register allocator will be used that
749 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
750 * user is non-zero, then the regular register allocator will be used
751 * (\sa do_regalloc).
752 */
753 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
754 {
755 struct r300_fragment_program_compiler *c =
756 (struct r300_fragment_program_compiler*)cc;
757 struct regalloc_state s;
758 int * do_full_regalloc = (int*)user;
759
760 memset(&s, 0, sizeof(s));
761 s.C = cc;
762 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
763 s.Input = memory_pool_malloc(&cc->Pool,
764 s.NumInputs * sizeof(struct register_info));
765 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
766
767 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
768 s.Temporary = memory_pool_malloc(&cc->Pool,
769 s.NumTemporaries * sizeof(struct register_info));
770 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
771
772 rc_recompute_ips(s.C);
773
774 c->AllocateHwInputs(c, &alloc_input_simple, &s);
775 if (*do_full_regalloc) {
776 do_advanced_regalloc(&s);
777 } else {
778 s.Simple = 1;
779 do_regalloc_inputs_only(&s);
780 }
781
782 /* Rewrite inputs and if we are doing the simple allocation, rewrite
783 * temporaries too. */
784 for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
785 inst != &s.C->Program.Instructions;
786 inst = inst->Next) {
787 rc_remap_registers(inst, &remap_register, &s);
788 }
789 }