r300/compiler: Don't create register classes for inputs
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_regalloc.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_program_pair.h"
30
31 #include <stdio.h>
32
33 #include "main/glheader.h"
34 #include "program/register_allocate.h"
35 #include "ralloc.h"
36
37 #include "r300_fragprog_swizzle.h"
38 #include "radeon_compiler.h"
39 #include "radeon_compiler_util.h"
40 #include "radeon_dataflow.h"
41 #include "radeon_list.h"
42 #include "radeon_variable.h"
43
44 #define VERBOSE 0
45
46 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
47
48
49
50 struct register_info {
51 struct live_intervals Live[4];
52
53 unsigned int Used:1;
54 unsigned int Allocated:1;
55 unsigned int File:3;
56 unsigned int Index:RC_REGISTER_INDEX_BITS;
57 unsigned int Writemask;
58 };
59
60 struct regalloc_state {
61 struct radeon_compiler * C;
62
63 struct register_info * Input;
64 unsigned int NumInputs;
65
66 struct register_info * Temporary;
67 unsigned int NumTemporaries;
68
69 unsigned int Simple;
70 int LoopEnd;
71 };
72
73 enum rc_reg_class {
74 RC_REG_CLASS_SINGLE,
75 RC_REG_CLASS_DOUBLE,
76 RC_REG_CLASS_TRIPLE,
77 RC_REG_CLASS_ALPHA,
78 RC_REG_CLASS_SINGLE_PLUS_ALPHA,
79 RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
80 RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
81 RC_REG_CLASS_X,
82 RC_REG_CLASS_Y,
83 RC_REG_CLASS_Z,
84 RC_REG_CLASS_XY,
85 RC_REG_CLASS_YZ,
86 RC_REG_CLASS_XZ,
87 RC_REG_CLASS_XW,
88 RC_REG_CLASS_YW,
89 RC_REG_CLASS_ZW,
90 RC_REG_CLASS_XYW,
91 RC_REG_CLASS_YZW,
92 RC_REG_CLASS_XZW,
93 RC_REG_CLASS_COUNT
94 };
95
96 struct rc_class {
97 enum rc_reg_class Class;
98
99 unsigned int WritemaskCount;
100
101 /** This is 1 if this class is being used by the register allocator
102 * and 0 otherwise */
103 unsigned int Used;
104
105 /** This is the ID number assigned to this class by ra. */
106 unsigned int Id;
107
108 /** List of writemasks that belong to this class */
109 unsigned int Writemasks[3];
110
111
112 };
113
114 static void print_live_intervals(struct live_intervals * src)
115 {
116 if (!src || !src->Used) {
117 DBG("(null)");
118 return;
119 }
120
121 DBG("(%i,%i)", src->Start, src->End);
122 }
123
124 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
125 {
126 if (VERBOSE) {
127 DBG("overlap_live_intervals: ");
128 print_live_intervals(a);
129 DBG(" to ");
130 print_live_intervals(b);
131 DBG("\n");
132 }
133
134 if (!a->Used || !b->Used) {
135 DBG(" unused interval\n");
136 return 0;
137 }
138
139 if (a->Start > b->Start) {
140 if (a->Start < b->End) {
141 DBG(" overlap\n");
142 return 1;
143 }
144 } else if (b->Start > a->Start) {
145 if (b->Start < a->End) {
146 DBG(" overlap\n");
147 return 1;
148 }
149 } else { /* a->Start == b->Start */
150 if (a->Start != a->End && b->Start != b->End) {
151 DBG(" overlap\n");
152 return 1;
153 }
154 }
155
156 DBG(" no overlap\n");
157
158 return 0;
159 }
160
161 static void scan_read_callback(void * data, struct rc_instruction * inst,
162 rc_register_file file, unsigned int index, unsigned int mask)
163 {
164 struct regalloc_state * s = data;
165 struct register_info * reg;
166 unsigned int i;
167
168 if (file != RC_FILE_INPUT)
169 return;
170
171 s->Input[index].Used = 1;
172 reg = &s->Input[index];
173
174 for (i = 0; i < 4; i++) {
175 if (!((mask >> i) & 0x1)) {
176 continue;
177 }
178 reg->Live[i].Used = 1;
179 reg->Live[i].Start = 0;
180 reg->Live[i].End =
181 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
182 }
183 }
184
185 static void remap_register(void * data, struct rc_instruction * inst,
186 rc_register_file * file, unsigned int * index)
187 {
188 struct regalloc_state * s = data;
189 const struct register_info * reg;
190
191 if (*file == RC_FILE_TEMPORARY && s->Simple)
192 reg = &s->Temporary[*index];
193 else if (*file == RC_FILE_INPUT)
194 reg = &s->Input[*index];
195 else
196 return;
197
198 if (reg->Allocated) {
199 *index = reg->Index;
200 }
201 }
202
203 static void alloc_input_simple(void * data, unsigned int input,
204 unsigned int hwreg)
205 {
206 struct regalloc_state * s = data;
207
208 if (input >= s->NumInputs)
209 return;
210
211 s->Input[input].Allocated = 1;
212 s->Input[input].File = RC_FILE_TEMPORARY;
213 s->Input[input].Index = hwreg;
214 }
215
216 /* This functions offsets the temporary register indices by the number
217 * of input registers, because input registers are actually temporaries and
218 * should not occupy the same space.
219 *
220 * This pass is supposed to be used to maintain correct allocation of inputs
221 * if the standard register allocation is disabled. */
222 static void do_regalloc_inputs_only(struct regalloc_state * s)
223 {
224 for (unsigned i = 0; i < s->NumTemporaries; i++) {
225 s->Temporary[i].Allocated = 1;
226 s->Temporary[i].File = RC_FILE_TEMPORARY;
227 s->Temporary[i].Index = i + s->NumInputs;
228 }
229 }
230
231 static unsigned int is_derivative(rc_opcode op)
232 {
233 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
234 }
235
236 static int find_class(
237 struct rc_class * classes,
238 unsigned int writemask,
239 unsigned int max_writemask_count)
240 {
241 unsigned int i;
242 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
243 unsigned int j;
244 if (classes[i].WritemaskCount > max_writemask_count) {
245 continue;
246 }
247 for (j = 0; j < 3; j++) {
248 if (classes[i].Writemasks[j] == writemask) {
249 return i;
250 }
251 }
252 }
253 return -1;
254 }
255
256 struct variable_get_class_cb_data {
257 unsigned int * can_change_writemask;
258 unsigned int conversion_swizzle;
259 };
260
261 static void variable_get_class_read_cb(
262 void * userdata,
263 struct rc_instruction * inst,
264 struct rc_pair_instruction_arg * arg,
265 struct rc_pair_instruction_source * src)
266 {
267 struct variable_get_class_cb_data * d = userdata;
268 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
269 d->conversion_swizzle);
270 if (!r300_swizzle_is_native_basic(new_swizzle)) {
271 *d->can_change_writemask = 0;
272 }
273 }
274
275 static enum rc_reg_class variable_get_class(
276 struct rc_variable * variable,
277 struct rc_class * classes)
278 {
279 unsigned int i;
280 unsigned int can_change_writemask= 1;
281 unsigned int writemask = rc_variable_writemask_sum(variable);
282 struct rc_list * readers = rc_variable_readers_union(variable);
283 int class_index;
284
285 if (!variable->C->is_r500) {
286 struct rc_class c;
287 struct rc_variable * var_ptr;
288 /* The assumption here is that if an instruction has type
289 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
290 * r300 and r400 can't swizzle the result of a TEX lookup. */
291 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
292 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
293 writemask = RC_MASK_XYZW;
294 }
295 }
296
297 /* Check if it is possible to do swizzle packing for r300/r400
298 * without creating non-native swizzles. */
299 class_index = find_class(classes, writemask, 3);
300 if (class_index < 0) {
301 goto error;
302 }
303 c = classes[class_index];
304 if (c.WritemaskCount == 1) {
305 goto done;
306 }
307 for (i = 0; i < c.WritemaskCount; i++) {
308 struct rc_variable * var_ptr;
309 for (var_ptr = variable; var_ptr;
310 var_ptr = var_ptr->Friend) {
311 int j;
312 unsigned int conversion_swizzle =
313 rc_make_conversion_swizzle(
314 writemask, c.Writemasks[i]);
315 struct variable_get_class_cb_data d;
316 d.can_change_writemask = &can_change_writemask;
317 d.conversion_swizzle = conversion_swizzle;
318 /* If we get this far var_ptr->Inst has to
319 * be a pair instruction. If variable or any
320 * of its friends are normal instructions,
321 * then the writemask will be set to RC_MASK_XYZW
322 * and the function will return before it gets
323 * here. */
324 rc_pair_for_all_reads_arg(var_ptr->Inst,
325 variable_get_class_read_cb, &d);
326
327 for (j = 0; j < var_ptr->ReaderCount; j++) {
328 unsigned int old_swizzle;
329 unsigned int new_swizzle;
330 struct rc_reader r = var_ptr->Readers[j];
331 if (r.Inst->Type ==
332 RC_INSTRUCTION_PAIR ) {
333 old_swizzle = r.U.P.Arg->Swizzle;
334 } else {
335 old_swizzle = r.U.I.Src->Swizzle;
336 }
337 new_swizzle = rc_adjust_channels(
338 old_swizzle, conversion_swizzle);
339 if (!r300_swizzle_is_native_basic(
340 new_swizzle)) {
341 can_change_writemask = 0;
342 break;
343 }
344 }
345 if (!can_change_writemask) {
346 break;
347 }
348 }
349 if (!can_change_writemask) {
350 break;
351 }
352 }
353 }
354
355 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
356 /* DDX/DDY seem to always fail when their writemasks are
357 * changed.*/
358 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
359 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
360 can_change_writemask = 0;
361 }
362 }
363 for ( ; readers; readers = readers->Next) {
364 struct rc_reader * r = readers->Item;
365 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
366 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
367 can_change_writemask = 0;
368 break;
369 }
370 /* DDX/DDY also fail when their swizzles are changed. */
371 if (is_derivative(r->Inst->U.P.RGB.Opcode)
372 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
373 can_change_writemask = 0;
374 break;
375 }
376 }
377 }
378
379 class_index = find_class(classes, writemask,
380 can_change_writemask ? 3 : 1);
381 done:
382 if (class_index > -1) {
383 return classes[class_index].Class;
384 } else {
385 error:
386 rc_error(variable->C,
387 "Could not find class for index=%u mask=%u\n",
388 variable->Dst.Index, writemask);
389 return 0;
390 }
391 }
392
393 static unsigned int overlap_live_intervals_array(
394 struct live_intervals * a,
395 struct live_intervals * b)
396 {
397 unsigned int a_chan, b_chan;
398 for (a_chan = 0; a_chan < 4; a_chan++) {
399 for (b_chan = 0; b_chan < 4; b_chan++) {
400 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
401 return 1;
402 }
403 }
404 }
405 return 0;
406 }
407
408 static unsigned int reg_get_index(int reg)
409 {
410 return reg / RC_MASK_XYZW;
411 }
412
413 static unsigned int reg_get_writemask(int reg)
414 {
415 return (reg % RC_MASK_XYZW) + 1;
416 }
417
418 static int get_reg_id(unsigned int index, unsigned int writemask)
419 {
420 assert(writemask);
421 if (writemask == 0) {
422 return 0;
423 }
424 return (index * RC_MASK_XYZW) + (writemask - 1);
425 }
426
427 #if VERBOSE
428 static void print_reg(int reg)
429 {
430 unsigned int index = reg_get_index(reg);
431 unsigned int mask = reg_get_writemask(reg);
432 fprintf(stderr, "Temp[%u].%c%c%c%c", index,
433 mask & RC_MASK_X ? 'x' : '_',
434 mask & RC_MASK_Y ? 'y' : '_',
435 mask & RC_MASK_Z ? 'z' : '_',
436 mask & RC_MASK_W ? 'w' : '_');
437 }
438 #endif
439
440 static void add_register_conflicts(
441 struct ra_regs * regs,
442 unsigned int max_temp_regs)
443 {
444 unsigned int index, a_mask, b_mask;
445 for (index = 0; index < max_temp_regs; index++) {
446 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
447 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
448 b_mask++) {
449 if (a_mask & b_mask) {
450 ra_add_reg_conflict(regs,
451 get_reg_id(index, a_mask),
452 get_reg_id(index, b_mask));
453 }
454 }
455 }
456 }
457 }
458
459 static void do_advanced_regalloc(struct regalloc_state * s)
460 {
461 struct rc_class rc_class_list [] = {
462 {RC_REG_CLASS_SINGLE, 3, 0, 0,
463 {RC_MASK_X,
464 RC_MASK_Y,
465 RC_MASK_Z}},
466 {RC_REG_CLASS_DOUBLE, 3, 0, 0,
467 {RC_MASK_X | RC_MASK_Y,
468 RC_MASK_X | RC_MASK_Z,
469 RC_MASK_Y | RC_MASK_Z}},
470 {RC_REG_CLASS_TRIPLE, 1, 0, 0,
471 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
472 RC_MASK_NONE,
473 RC_MASK_NONE}},
474 {RC_REG_CLASS_ALPHA, 1, 0, 0,
475 {RC_MASK_W,
476 RC_MASK_NONE,
477 RC_MASK_NONE}},
478 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
479 {RC_MASK_X | RC_MASK_W,
480 RC_MASK_Y | RC_MASK_W,
481 RC_MASK_Z | RC_MASK_W}},
482 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
483 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
484 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
485 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
486 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
487 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
488 RC_MASK_NONE,
489 RC_MASK_NONE}},
490 {RC_REG_CLASS_X, 1, 0, 0,
491 {RC_MASK_X,
492 RC_MASK_NONE,
493 RC_MASK_NONE}},
494 {RC_REG_CLASS_Y, 1, 0, 0,
495 {RC_MASK_Y,
496 RC_MASK_NONE,
497 RC_MASK_NONE}},
498 {RC_REG_CLASS_Z, 1, 0, 0,
499 {RC_MASK_Z,
500 RC_MASK_NONE,
501 RC_MASK_NONE}},
502 {RC_REG_CLASS_XY, 1, 0, 0,
503 {RC_MASK_X | RC_MASK_Y,
504 RC_MASK_NONE,
505 RC_MASK_NONE}},
506 {RC_REG_CLASS_YZ, 1, 0, 0,
507 {RC_MASK_Y | RC_MASK_Z,
508 RC_MASK_NONE,
509 RC_MASK_NONE}},
510 {RC_REG_CLASS_XZ, 1, 0, 0,
511 {RC_MASK_X | RC_MASK_Z,
512 RC_MASK_NONE,
513 RC_MASK_NONE}},
514 {RC_REG_CLASS_XW, 1, 0, 0,
515 {RC_MASK_X | RC_MASK_W,
516 RC_MASK_NONE,
517 RC_MASK_NONE}},
518 {RC_REG_CLASS_YW, 1, 0, 0,
519 {RC_MASK_Y | RC_MASK_W,
520 RC_MASK_NONE,
521 RC_MASK_NONE}},
522 {RC_REG_CLASS_ZW, 1, 0, 0,
523 {RC_MASK_Z | RC_MASK_W,
524 RC_MASK_NONE,
525 RC_MASK_NONE}},
526 {RC_REG_CLASS_XYW, 1, 0, 0,
527 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
528 RC_MASK_NONE,
529 RC_MASK_NONE}},
530 {RC_REG_CLASS_YZW, 1, 0, 0,
531 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
532 RC_MASK_NONE,
533 RC_MASK_NONE}},
534 {RC_REG_CLASS_XZW, 1, 0, 0,
535 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
536 RC_MASK_NONE,
537 RC_MASK_NONE}}
538 };
539
540 unsigned int i, j, index, input_node, node_count, node_index;
541 unsigned int * node_classes;
542 struct rc_instruction * inst;
543 struct rc_list * var_ptr;
544 struct rc_list * variables;
545 struct ra_regs * regs;
546 struct ra_graph * graph;
547
548 /* Allocate the main ra data structure */
549 regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW);
550
551 /* Get list of program variables */
552 variables = rc_get_variables(s->C);
553 node_count = rc_list_count(variables);
554 node_classes = memory_pool_malloc(&s->C->Pool,
555 node_count * sizeof(unsigned int));
556
557 for (var_ptr = variables, node_index = 0; var_ptr;
558 var_ptr = var_ptr->Next, node_index++) {
559 unsigned int class_index;
560 /* Compute the live intervals */
561 rc_variable_compute_live_intervals(var_ptr->Item);
562
563 class_index = variable_get_class(var_ptr->Item, rc_class_list);
564
565 /* If we haven't used this register class yet, mark it
566 * as used and allocate space for it. */
567 if (!rc_class_list[class_index].Used) {
568 rc_class_list[class_index].Used = 1;
569 rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
570 }
571
572 node_classes[node_index] = rc_class_list[class_index].Id;
573 }
574
575
576 /* Assign registers to the classes */
577 for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
578 struct rc_class class = rc_class_list[i];
579 if (!class.Used) {
580 continue;
581 }
582
583 for (index = 0; index < s->C->max_temp_regs; index++) {
584 for (j = 0; j < class.WritemaskCount; j++) {
585 int reg_id = get_reg_id(index,
586 class.Writemasks[j]);
587 ra_class_add_reg(regs, class.Id, reg_id);
588 }
589 }
590 }
591
592 /* Add register conflicts */
593 add_register_conflicts(regs, s->C->max_temp_regs);
594
595 /* Calculate live intervals for input registers */
596 for (inst = s->C->Program.Instructions.Next;
597 inst != &s->C->Program.Instructions;
598 inst = inst->Next) {
599 rc_opcode op = rc_get_flow_control_inst(inst);
600 if (op == RC_OPCODE_BGNLOOP) {
601 struct rc_instruction * endloop =
602 rc_match_bgnloop(inst);
603 if (endloop->IP > s->LoopEnd) {
604 s->LoopEnd = endloop->IP;
605 }
606 }
607 rc_for_all_reads_mask(inst, scan_read_callback, s);
608 }
609
610 /* Compute the writemask for inputs. */
611 for (i = 0; i < s->NumInputs; i++) {
612 unsigned int chan, class_id, writemask = 0;
613 for (chan = 0; chan < 4; chan++) {
614 if (s->Input[i].Live[chan].Used) {
615 writemask |= (1 << chan);
616 }
617 }
618 s->Input[i].Writemask = writemask;
619 }
620
621 ra_set_finalize(regs, NULL);
622
623 graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
624
625 /* Build the interference graph */
626 for (var_ptr = variables, node_index = 0; var_ptr;
627 var_ptr = var_ptr->Next,node_index++) {
628 struct rc_list * a, * b;
629 unsigned int b_index;
630
631 ra_set_node_class(graph, node_index, node_classes[node_index]);
632
633 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
634 b; b = b->Next, b_index++) {
635 struct rc_variable * var_a = a->Item;
636 while (var_a) {
637 struct rc_variable * var_b = b->Item;
638 while (var_b) {
639 if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
640 ra_add_node_interference(graph,
641 node_index, b_index);
642 }
643 var_b = var_b->Friend;
644 }
645 var_a = var_a->Friend;
646 }
647 }
648 }
649
650 /* Add input registers to the interference graph */
651 for (i = 0, input_node = 0; i< s->NumInputs; i++) {
652 if (!s->Input[i].Writemask) {
653 continue;
654 }
655 for (var_ptr = variables, node_index = 0;
656 var_ptr; var_ptr = var_ptr->Next, node_index++) {
657 struct rc_variable * var = var_ptr->Item;
658 if (overlap_live_intervals_array(s->Input[i].Live,
659 var->Live)) {
660 ra_add_node_interference(graph, node_index,
661 node_count + input_node);
662 }
663 }
664 /* Manually allocate a register for this input */
665 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
666 s->Input[i].Index, s->Input[i].Writemask));
667 input_node++;
668 }
669
670 if (!ra_allocate_no_spills(graph)) {
671 rc_error(s->C, "Ran out of hardware temporaries\n");
672 return;
673 }
674
675 /* Rewrite the registers */
676 for (var_ptr = variables, node_index = 0; var_ptr;
677 var_ptr = var_ptr->Next, node_index++) {
678 int reg = ra_get_node_reg(graph, node_index);
679 unsigned int writemask = reg_get_writemask(reg);
680 unsigned int index = reg_get_index(reg);
681 struct rc_variable * var = var_ptr->Item;
682
683 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
684 writemask = rc_variable_writemask_sum(var);
685 }
686
687 if (var->Dst.File == RC_FILE_INPUT) {
688 continue;
689 }
690 rc_variable_change_dst(var, index, writemask);
691 }
692
693 ralloc_free(graph);
694 ralloc_free(regs);
695 }
696
697 /**
698 * @param user This parameter should be a pointer to an integer value. If this
699 * integer value is zero, then a simple register allocator will be used that
700 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
701 * user is non-zero, then the regular register allocator will be used
702 * (\sa do_regalloc).
703 */
704 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
705 {
706 struct r300_fragment_program_compiler *c =
707 (struct r300_fragment_program_compiler*)cc;
708 struct regalloc_state s;
709 int * do_full_regalloc = (int*)user;
710
711 memset(&s, 0, sizeof(s));
712 s.C = cc;
713 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
714 s.Input = memory_pool_malloc(&cc->Pool,
715 s.NumInputs * sizeof(struct register_info));
716 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
717
718 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
719 s.Temporary = memory_pool_malloc(&cc->Pool,
720 s.NumTemporaries * sizeof(struct register_info));
721 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
722
723 rc_recompute_ips(s.C);
724
725 c->AllocateHwInputs(c, &alloc_input_simple, &s);
726 if (*do_full_regalloc) {
727 do_advanced_regalloc(&s);
728 } else {
729 s.Simple = 1;
730 do_regalloc_inputs_only(&s);
731 }
732
733 /* Rewrite inputs and if we are doing the simple allocation, rewrite
734 * temporaries too. */
735 for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
736 inst != &s.C->Program.Instructions;
737 inst = inst->Next) {
738 rc_remap_registers(inst, &remap_register, &s);
739 }
740 }