r300/compiler: make lowering passes possibly use up to two less temps
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_schedule.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct schedule_instruction {
42 struct rc_instruction * Instruction;
43
44 /** Next instruction in the linked list of ready instructions. */
45 struct schedule_instruction *NextReady;
46
47 /** Values that this instruction reads and writes */
48 struct reg_value * WriteValues[4];
49 struct reg_value * ReadValues[12];
50 unsigned int NumWriteValues:3;
51 unsigned int NumReadValues:4;
52
53 /**
54 * Number of (read and write) dependencies that must be resolved before
55 * this instruction can be scheduled.
56 */
57 unsigned int NumDependencies:5;
58
59 /** List of all readers (see rc_get_readers() for the definition of
60 * "all readers"), even those outside the basic block this instruction
61 * lives in. */
62 struct rc_reader_data GlobalReaders;
63 };
64
65
66 /**
67 * Used to keep track of which instructions read a value.
68 */
69 struct reg_value_reader {
70 struct schedule_instruction *Reader;
71 struct reg_value_reader *Next;
72 };
73
74 /**
75 * Used to keep track which values are stored in each component of a
76 * RC_FILE_TEMPORARY.
77 */
78 struct reg_value {
79 struct schedule_instruction * Writer;
80
81 /**
82 * Unordered linked list of instructions that read from this value.
83 * When this value becomes available, we increase all readers'
84 * dependency count.
85 */
86 struct reg_value_reader *Readers;
87
88 /**
89 * Number of readers of this value. This is decremented each time
90 * a reader of the value is committed.
91 * When the reader cound reaches zero, the dependency count
92 * of the instruction writing \ref Next is decremented.
93 */
94 unsigned int NumReaders;
95
96 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
97 };
98
99 struct register_state {
100 struct reg_value * Values[4];
101 };
102
103 struct remap_reg {
104 struct rc_instruciont * Inst;
105 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
106 unsigned int OldSwizzle:3;
107 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
108 unsigned int NewSwizzle:3;
109 unsigned int OnlyTexReads:1;
110 struct remap_reg * Next;
111 };
112
113 struct schedule_state {
114 struct radeon_compiler * C;
115 struct schedule_instruction * Current;
116
117 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
118
119 /**
120 * Linked lists of instructions that can be scheduled right now,
121 * based on which ALU/TEX resources they require.
122 */
123 /*@{*/
124 struct schedule_instruction *ReadyFullALU;
125 struct schedule_instruction *ReadyRGB;
126 struct schedule_instruction *ReadyAlpha;
127 struct schedule_instruction *ReadyTEX;
128 /*@}*/
129 };
130
131 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
132 rc_register_file file, unsigned int index, unsigned int chan)
133 {
134 if (file != RC_FILE_TEMPORARY)
135 return 0;
136
137 if (index >= RC_REGISTER_MAX_INDEX) {
138 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
139 return 0;
140 }
141
142 return &s->Temporary[index].Values[chan];
143 }
144
145 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
146 {
147 inst->NextReady = *list;
148 *list = inst;
149 }
150
151 static void add_inst_to_list_end(struct schedule_instruction ** list,
152 struct schedule_instruction * inst)
153 {
154 if(!*list){
155 *list = inst;
156 }else{
157 struct schedule_instruction * temp = *list;
158 while(temp->NextReady){
159 temp = temp->NextReady;
160 }
161 temp->NextReady = inst;
162 }
163 }
164
165 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
166 {
167 DBG("%i is now ready\n", sinst->Instruction->IP);
168
169 /* Adding Ready TEX instructions to the end of the "Ready List" helps
170 * us emit TEX instructions in blocks without losing our place. */
171 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
172 add_inst_to_list_end(&s->ReadyTEX, sinst);
173 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
174 add_inst_to_list(&s->ReadyRGB, sinst);
175 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
176 add_inst_to_list(&s->ReadyAlpha, sinst);
177 else
178 add_inst_to_list(&s->ReadyFullALU, sinst);
179 }
180
181 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
182 {
183 assert(sinst->NumDependencies > 0);
184 sinst->NumDependencies--;
185 if (!sinst->NumDependencies)
186 instruction_ready(s, sinst);
187 }
188
189 /**
190 * This function decreases the dependencies of the next instruction that
191 * wants to write to each of sinst's read values.
192 */
193 static void commit_update_reads(struct schedule_state * s,
194 struct schedule_instruction * sinst){
195 unsigned int i;
196 for(i = 0; i < sinst->NumReadValues; ++i) {
197 struct reg_value * v = sinst->ReadValues[i];
198 assert(v->NumReaders > 0);
199 v->NumReaders--;
200 if (!v->NumReaders) {
201 if (v->Next)
202 decrease_dependencies(s, v->Next->Writer);
203 }
204 }
205 }
206
207 static void commit_update_writes(struct schedule_state * s,
208 struct schedule_instruction * sinst){
209 unsigned int i;
210 for(i = 0; i < sinst->NumWriteValues; ++i) {
211 struct reg_value * v = sinst->WriteValues[i];
212 if (v->NumReaders) {
213 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
214 decrease_dependencies(s, r->Reader);
215 }
216 } else {
217 /* This happens in instruction sequences of the type
218 * OP r.x, ...;
219 * OP r.x, r.x, ...;
220 * See also the subtlety in how instructions that both
221 * read and write the same register are scanned.
222 */
223 if (v->Next)
224 decrease_dependencies(s, v->Next->Writer);
225 }
226 }
227 }
228
229 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
230 {
231 DBG("%i: commit\n", sinst->Instruction->IP);
232
233 commit_update_reads(s, sinst);
234
235 commit_update_writes(s, sinst);
236 }
237
238 /**
239 * Emit all ready texture instructions in a single block.
240 *
241 * Emit as a single block to (hopefully) sample many textures in parallel,
242 * and to avoid hardware indirections on R300.
243 */
244 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
245 {
246 struct schedule_instruction *readytex;
247 struct rc_instruction * inst_begin;
248
249 assert(s->ReadyTEX);
250
251 /* Node marker for R300 */
252 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
253 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
254
255 /* Link texture instructions back in */
256 readytex = s->ReadyTEX;
257 while(readytex) {
258 rc_insert_instruction(before->Prev, readytex->Instruction);
259 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
260
261 /* All of the TEX instructions in the same TEX block have
262 * their source registers read from before any of the
263 * instructions in that block write to their destination
264 * registers. This means that when we commit a TEX
265 * instruction, any other TEX instruction that wants to write
266 * to one of the committed instruction's source register can be
267 * marked as ready and should be emitted in the same TEX
268 * block. This prevents the following sequence from being
269 * emitted in two different TEX blocks:
270 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
271 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
272 */
273 commit_update_reads(s, readytex);
274 readytex = readytex->NextReady;
275 }
276 readytex = s->ReadyTEX;
277 s->ReadyTEX = 0;
278 while(readytex){
279 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
280 commit_update_writes(s, readytex);
281 readytex = readytex->NextReady;
282 }
283 }
284
285 /* This is a helper function for destructive_merge_instructions(). It helps
286 * merge presubtract sources from two instructions and makes sure the
287 * presubtract sources end up in the correct spot. This function assumes that
288 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
289 * but no scalar instruction (alpha).
290 * @return 0 if merging the presubtract sources fails.
291 * @retrun 1 if merging the presubtract sources succeeds.
292 */
293 static int merge_presub_sources(
294 struct rc_pair_instruction * dst_full,
295 struct rc_pair_sub_instruction src,
296 unsigned int type)
297 {
298 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
299 struct rc_pair_sub_instruction * dst_sub;
300 const struct rc_opcode_info * info;
301
302 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
303
304 switch(type) {
305 case RC_SOURCE_RGB:
306 is_rgb = 1;
307 is_alpha = 0;
308 dst_sub = &dst_full->RGB;
309 break;
310 case RC_SOURCE_ALPHA:
311 is_rgb = 0;
312 is_alpha = 1;
313 dst_sub = &dst_full->Alpha;
314 break;
315 default:
316 assert(0);
317 return 0;
318 }
319
320 info = rc_get_opcode_info(dst_full->RGB.Opcode);
321
322 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
323 return 0;
324
325 srcp_regs = rc_presubtract_src_reg_count(
326 src.Src[RC_PAIR_PRESUB_SRC].Index);
327 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
328 unsigned int arg;
329 int free_source;
330 unsigned int one_way = 0;
331 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
332 struct rc_pair_instruction_source temp;
333
334 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
335 srcp.File, srcp.Index);
336
337 /* If free_source < 0 then there are no free source
338 * slots. */
339 if (free_source < 0)
340 return 0;
341
342 temp = dst_sub->Src[srcp_src];
343 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
344
345 /* srcp needs src0 and src1 to be the same */
346 if (free_source < srcp_src) {
347 if (!temp.Used)
348 continue;
349 free_source = rc_pair_alloc_source(dst_full, is_rgb,
350 is_alpha, temp.File, temp.Index);
351 if (free_source < 0)
352 return 0;
353 one_way = 1;
354 } else {
355 dst_sub->Src[free_source] = temp;
356 }
357
358 /* If free_source == srcp_src, then the presubtract
359 * source is already in the correct place. */
360 if (free_source == srcp_src)
361 continue;
362
363 /* Shuffle the sources, so we can put the
364 * presubtract source in the correct place. */
365 for(arg = 0; arg < info->NumSrcRegs; arg++) {
366 /*If this arg does not read from an rgb source,
367 * do nothing. */
368 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
369 3) & type)) {
370 continue;
371 }
372
373 if (dst_full->RGB.Arg[arg].Source == srcp_src)
374 dst_full->RGB.Arg[arg].Source = free_source;
375 /* We need to do this just in case register
376 * is one of the sources already, but in the
377 * wrong spot. */
378 else if(dst_full->RGB.Arg[arg].Source == free_source
379 && !one_way) {
380 dst_full->RGB.Arg[arg].Source = srcp_src;
381 }
382 }
383 }
384 return 1;
385 }
386
387
388 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
389 static int destructive_merge_instructions(
390 struct rc_pair_instruction * rgb,
391 struct rc_pair_instruction * alpha)
392 {
393 const struct rc_opcode_info * opcode;
394
395 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
396 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
397
398 /* Presubtract registers need to be merged first so that registers
399 * needed by the presubtract operation can be placed in src0 and/or
400 * src1. */
401
402 /* Merge the rgb presubtract registers. */
403 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
404 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
405 return 0;
406 }
407 }
408 /* Merge the alpha presubtract registers */
409 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
410 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
411 return 0;
412 }
413 }
414
415 /* Copy alpha args into rgb */
416 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
417
418 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
419 unsigned int srcrgb = 0;
420 unsigned int srcalpha = 0;
421 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
422 rc_register_file file = 0;
423 unsigned int index = 0;
424 int source;
425
426 if (alpha->Alpha.Arg[arg].Swizzle < 3) {
427 srcrgb = 1;
428 file = alpha->RGB.Src[oldsrc].File;
429 index = alpha->RGB.Src[oldsrc].Index;
430 } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
431 srcalpha = 1;
432 file = alpha->Alpha.Src[oldsrc].File;
433 index = alpha->Alpha.Src[oldsrc].Index;
434 }
435
436 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
437 if (source < 0)
438 return 0;
439
440 rgb->Alpha.Arg[arg].Source = source;
441 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
442 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
443 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
444 }
445
446 /* Copy alpha opcode into rgb */
447 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
448 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
449 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
450 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
451 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
452 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
453
454 /* Merge ALU result writing */
455 if (alpha->WriteALUResult) {
456 if (rgb->WriteALUResult)
457 return 0;
458
459 rgb->WriteALUResult = alpha->WriteALUResult;
460 rgb->ALUResultCompare = alpha->ALUResultCompare;
461 }
462
463 return 1;
464 }
465
466 /**
467 * Try to merge the given instructions into the rgb instructions.
468 *
469 * Return true on success; on failure, return false, and keep
470 * the instructions untouched.
471 */
472 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
473 {
474 struct rc_pair_instruction backup;
475
476 /*Instructions can't write output registers and ALU result at the
477 * same time. */
478 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
479 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
480 return 0;
481 }
482 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
483
484 if (destructive_merge_instructions(rgb, alpha))
485 return 1;
486
487 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
488 return 0;
489 }
490
491 static void presub_nop(struct rc_instruction * emitted) {
492 int prev_rgb_index, prev_alpha_index, i, num_src;
493
494 /* We don't need a nop if the previous instruction is a TEX. */
495 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
496 return;
497 }
498 if (emitted->Prev->U.P.RGB.WriteMask)
499 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
500 else
501 prev_rgb_index = -1;
502 if (emitted->Prev->U.P.Alpha.WriteMask)
503 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
504 else
505 prev_alpha_index = 1;
506
507 /* Check the previous rgb instruction */
508 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
509 num_src = rc_presubtract_src_reg_count(
510 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
511 for (i = 0; i < num_src; i++) {
512 unsigned int index = emitted->U.P.RGB.Src[i].Index;
513 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
514 && (index == prev_rgb_index
515 || index == prev_alpha_index)) {
516 emitted->Prev->U.P.Nop = 1;
517 return;
518 }
519 }
520 }
521
522 /* Check the previous alpha instruction. */
523 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
524 return;
525
526 num_src = rc_presubtract_src_reg_count(
527 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
528 for (i = 0; i < num_src; i++) {
529 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
530 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
531 && (index == prev_rgb_index || index == prev_alpha_index)) {
532 emitted->Prev->U.P.Nop = 1;
533 return;
534 }
535 }
536 }
537
538 static void rgb_to_alpha_remap (
539 struct rc_instruction * inst,
540 struct rc_pair_instruction_arg * arg,
541 rc_register_file old_file,
542 rc_swizzle old_swz,
543 unsigned int new_index)
544 {
545 int new_src_index;
546 unsigned int i;
547 struct rc_pair_instruction_source * old_src =
548 rc_pair_get_src(&inst->U.P, arg);
549 if (!old_src) {
550 return;
551 }
552
553 for (i = 0; i < 3; i++) {
554 if (get_swz(arg->Swizzle, i) == old_swz) {
555 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
556 }
557 }
558 memset(old_src, 0, sizeof(struct rc_pair_instruction_source));
559 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
560 old_file, new_index);
561 /* This conversion is not possible, we must have made a mistake in
562 * is_rgb_to_alpha_possible. */
563 if (new_src_index < 0) {
564 assert(0);
565 return;
566 }
567
568 arg->Source = new_src_index;
569 }
570
571 static int can_remap(unsigned int opcode)
572 {
573 switch(opcode) {
574 case RC_OPCODE_DDX:
575 case RC_OPCODE_DDY:
576 return 0;
577 default:
578 return 1;
579 }
580 }
581
582 static int can_convert_opcode_to_alpha(unsigned int opcode)
583 {
584 switch(opcode) {
585 case RC_OPCODE_DDX:
586 case RC_OPCODE_DDY:
587 case RC_OPCODE_DP2:
588 case RC_OPCODE_DP3:
589 case RC_OPCODE_DP4:
590 case RC_OPCODE_DPH:
591 return 0;
592 default:
593 return 1;
594 }
595 }
596
597 static void is_rgb_to_alpha_possible(
598 void * userdata,
599 struct rc_instruction * inst,
600 struct rc_pair_instruction_arg * arg,
601 struct rc_pair_instruction_source * src)
602 {
603 unsigned int chan_count = 0;
604 unsigned int alpha_sources = 0;
605 unsigned int i;
606 struct rc_reader_data * reader_data = userdata;
607
608 if (!can_remap(inst->U.P.RGB.Opcode)
609 || !can_remap(inst->U.P.Alpha.Opcode)) {
610 reader_data->Abort = 1;
611 return;
612 }
613
614 if (!src)
615 return;
616
617 /* XXX There are some cases where we can still do the conversion if
618 * a reader reads from a presubtract source, but for now we'll prevent
619 * it. */
620 if (arg->Source == RC_PAIR_PRESUB_SRC) {
621 reader_data->Abort = 1;
622 return;
623 }
624
625 /* Make sure the source only reads from one component.
626 * XXX We should allow the source to read from the same component twice.
627 * XXX If the index we will be converting to is the same as the
628 * current index, then it is OK to read from more than one component.
629 */
630 for (i = 0; i < 3; i++) {
631 rc_swizzle swz = get_swz(arg->Swizzle, i);
632 switch(swz) {
633 case RC_SWIZZLE_X:
634 case RC_SWIZZLE_Y:
635 case RC_SWIZZLE_Z:
636 case RC_SWIZZLE_W:
637 chan_count++;
638 break;
639 default:
640 break;
641 }
642 }
643 if (chan_count > 1) {
644 reader_data->Abort = 1;
645 return;
646 }
647
648 /* Make sure there are enough alpha sources.
649 * XXX If we know what register all the readers are going
650 * to be remapped to, then in some situations we can still do
651 * the subsitution, even if all 3 alpha sources are being used.*/
652 for (i = 0; i < 3; i++) {
653 if (inst->U.P.Alpha.Src[i].Used) {
654 alpha_sources++;
655 }
656 }
657 if (alpha_sources > 2) {
658 reader_data->Abort = 1;
659 return;
660 }
661 }
662
663 static int convert_rgb_to_alpha(
664 struct schedule_state * s,
665 struct schedule_instruction * sched_inst)
666 {
667 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
668 unsigned int old_mask = pair_inst->RGB.WriteMask;
669 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
670 const struct rc_opcode_info * info =
671 rc_get_opcode_info(pair_inst->RGB.Opcode);
672 int new_index = -1;
673 unsigned int i;
674
675 if (sched_inst->GlobalReaders.Abort)
676 return 0;
677
678 if (!pair_inst->RGB.WriteMask)
679 return 0;
680
681 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
682 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
683 return 0;
684 }
685
686 assert(sched_inst->NumWriteValues == 1);
687
688 if (!sched_inst->WriteValues[0]) {
689 assert(0);
690 return 0;
691 }
692
693 /* We start at the old index, because if we can reuse the same
694 * register and just change the swizzle then it is more likely we
695 * will be able to convert all the readers. */
696 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
697 struct reg_value ** new_regvalp = get_reg_valuep(
698 s, RC_FILE_TEMPORARY, i, 3);
699 if (!*new_regvalp) {
700 struct reg_value ** old_regvalp =
701 get_reg_valuep(s,
702 RC_FILE_TEMPORARY,
703 pair_inst->RGB.DestIndex,
704 rc_mask_to_swizzle(old_mask));
705 new_index = i;
706 *new_regvalp = *old_regvalp;
707 *old_regvalp = NULL;
708 new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
709 break;
710 }
711 }
712 if (new_index < 0) {
713 return 0;
714 }
715
716 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
717 pair_inst->Alpha.DestIndex = new_index;
718 pair_inst->Alpha.WriteMask = 1;
719 pair_inst->Alpha.Target = pair_inst->RGB.Target;
720 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
721 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
722 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
723 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
724 sizeof(pair_inst->Alpha.Arg));
725 /* Move the swizzles into the first chan */
726 for (i = 0; i < info->NumSrcRegs; i++) {
727 unsigned int j;
728 for (j = 0; j < 3; j++) {
729 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
730 if (swz != RC_SWIZZLE_UNUSED) {
731 pair_inst->Alpha.Arg[i].Swizzle = swz;
732 break;
733 }
734 }
735 }
736 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
737 pair_inst->RGB.DestIndex = 0;
738 pair_inst->RGB.WriteMask = 0;
739 pair_inst->RGB.Target = 0;
740 pair_inst->RGB.OutputWriteMask = 0;
741 pair_inst->RGB.DepthWriteMask = 0;
742 pair_inst->RGB.Saturate = 0;
743 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
744
745 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
746 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
747 rgb_to_alpha_remap(reader.Inst, reader.U.Arg,
748 RC_FILE_TEMPORARY, old_swz, new_index);
749 }
750 return 1;
751 }
752
753 /**
754 * Find a good ALU instruction or pair of ALU instruction and emit it.
755 *
756 * Prefer emitting full ALU instructions, so that when we reach a point
757 * where no full ALU instruction can be emitted, we have more candidates
758 * for RGB/Alpha pairing.
759 */
760 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
761 {
762 struct schedule_instruction * sinst;
763
764 if (s->ReadyFullALU) {
765 sinst = s->ReadyFullALU;
766 s->ReadyFullALU = s->ReadyFullALU->NextReady;
767 rc_insert_instruction(before->Prev, sinst->Instruction);
768 commit_alu_instruction(s, sinst);
769 } else {
770 struct schedule_instruction **prgb;
771 struct schedule_instruction **palpha;
772 struct schedule_instruction *prev;
773 pair:
774 /* Some pairings might fail because they require too
775 * many source slots; try all possible pairings if necessary */
776 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
777 for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
778 struct schedule_instruction * psirgb = *prgb;
779 struct schedule_instruction * psialpha = *palpha;
780
781 if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
782 continue;
783
784 *prgb = (*prgb)->NextReady;
785 *palpha = (*palpha)->NextReady;
786 rc_insert_instruction(before->Prev, psirgb->Instruction);
787 commit_alu_instruction(s, psirgb);
788 commit_alu_instruction(s, psialpha);
789 goto success;
790 }
791 }
792 prev = NULL;
793 /* No success in pairing, now try to convert one of the RGB
794 * instructions to an Alpha so we can pair it with another RGB.
795 */
796 if (s->ReadyRGB && s->ReadyRGB->NextReady) {
797 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
798 if ((*prgb)->NumWriteValues == 1) {
799 struct schedule_instruction * prgb_next;
800 if (!convert_rgb_to_alpha(s, *prgb))
801 goto cont_loop;
802 prgb_next = (*prgb)->NextReady;
803 /* Add instruction to the Alpha ready list. */
804 (*prgb)->NextReady = s->ReadyAlpha;
805 s->ReadyAlpha = *prgb;
806 /* Remove instruction from the RGB ready list.*/
807 if (prev)
808 prev->NextReady = prgb_next;
809 else
810 s->ReadyRGB = prgb_next;
811 goto pair;
812 }
813 cont_loop:
814 prev = *prgb;
815 }
816 }
817 /* Still no success in pairing, just take the first RGB
818 * or alpha instruction. */
819 if (s->ReadyRGB) {
820 sinst = s->ReadyRGB;
821 s->ReadyRGB = s->ReadyRGB->NextReady;
822 } else if (s->ReadyAlpha) {
823 sinst = s->ReadyAlpha;
824 s->ReadyAlpha = s->ReadyAlpha->NextReady;
825 } else {
826 /*XXX Something real bad has happened. */
827 assert(0);
828 }
829
830 rc_insert_instruction(before->Prev, sinst->Instruction);
831 commit_alu_instruction(s, sinst);
832 success: ;
833 }
834 /* If the instruction we just emitted uses a presubtract value, and
835 * the presubtract sources were written by the previous intstruction,
836 * the previous instruction needs a nop. */
837 presub_nop(before->Prev);
838 }
839
840 static void scan_read(void * data, struct rc_instruction * inst,
841 rc_register_file file, unsigned int index, unsigned int chan)
842 {
843 struct schedule_state * s = data;
844 struct reg_value ** v = get_reg_valuep(s, file, index, chan);
845 struct reg_value_reader * reader;
846
847 if (!v)
848 return;
849
850 if (*v && (*v)->Writer == s->Current) {
851 /* The instruction reads and writes to a register component.
852 * In this case, we only want to increment dependencies by one.
853 */
854 return;
855 }
856
857 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
858
859 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
860 reader->Reader = s->Current;
861 if (!*v) {
862 /* In this situation, the instruction reads from a register
863 * that hasn't been written to or read from in the current
864 * block. */
865 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
866 memset(*v, 0, sizeof(struct reg_value));
867 (*v)->Readers = reader;
868 } else {
869 reader->Next = (*v)->Readers;
870 (*v)->Readers = reader;
871 /* Only update the current instruction's dependencies if the
872 * register it reads from has been written to in this block. */
873 if ((*v)->Writer) {
874 s->Current->NumDependencies++;
875 }
876 }
877 (*v)->NumReaders++;
878
879 if (s->Current->NumReadValues >= 12) {
880 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
881 } else {
882 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
883 }
884 }
885
886 static void scan_write(void * data, struct rc_instruction * inst,
887 rc_register_file file, unsigned int index, unsigned int chan)
888 {
889 struct schedule_state * s = data;
890 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
891 struct reg_value * newv;
892
893 if (!pv)
894 return;
895
896 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
897
898 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
899 memset(newv, 0, sizeof(*newv));
900
901 newv->Writer = s->Current;
902
903 if (*pv) {
904 (*pv)->Next = newv;
905 s->Current->NumDependencies++;
906 }
907
908 *pv = newv;
909
910 if (s->Current->NumWriteValues >= 4) {
911 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
912 } else {
913 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
914 }
915 }
916
917 static void is_rgb_to_alpha_possible_normal(
918 void * userdata,
919 struct rc_instruction * inst,
920 struct rc_src_register * src)
921 {
922 struct rc_reader_data * reader_data = userdata;
923 reader_data->Abort = 1;
924
925 }
926
927 static void schedule_block(struct r300_fragment_program_compiler * c,
928 struct rc_instruction * begin, struct rc_instruction * end)
929 {
930 struct schedule_state s;
931 unsigned int ip;
932
933 memset(&s, 0, sizeof(s));
934 s.C = &c->Base;
935
936 /* Scan instructions for data dependencies */
937 ip = 0;
938 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
939 s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
940 memset(s.Current, 0, sizeof(struct schedule_instruction));
941
942 s.Current->Instruction = inst;
943 inst->IP = ip++;
944
945 DBG("%i: Scanning\n", inst->IP);
946
947 /* The order of things here is subtle and maybe slightly
948 * counter-intuitive, to account for the case where an
949 * instruction writes to the same register as it reads
950 * from. */
951 rc_for_all_writes_chan(inst, &scan_write, &s);
952 rc_for_all_reads_chan(inst, &scan_read, &s);
953
954 DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
955
956 if (!s.Current->NumDependencies)
957 instruction_ready(&s, s.Current);
958
959 /* Get global readers for possible RGB->Alpha conversion. */
960 rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
961 is_rgb_to_alpha_possible_normal,
962 is_rgb_to_alpha_possible, NULL);
963 }
964
965 /* Temporarily unlink all instructions */
966 begin->Prev->Next = end;
967 end->Prev = begin->Prev;
968
969 /* Schedule instructions back */
970 while(!s.C->Error &&
971 (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
972 if (s.ReadyTEX)
973 emit_all_tex(&s, end);
974
975 while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
976 emit_one_alu(&s, end);
977 }
978 }
979
980 static int is_controlflow(struct rc_instruction * inst)
981 {
982 if (inst->Type == RC_INSTRUCTION_NORMAL) {
983 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
984 return opcode->IsFlowControl;
985 }
986 return 0;
987 }
988
989 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
990 {
991 struct schedule_state s;
992
993 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
994 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
995
996 memset(&s, 0, sizeof(s));
997 s.C = &c->Base;
998 while(inst != &c->Base.Program.Instructions) {
999 struct rc_instruction * first;
1000
1001 if (is_controlflow(inst)) {
1002 inst = inst->Next;
1003 continue;
1004 }
1005
1006 first = inst;
1007
1008 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1009 inst = inst->Next;
1010
1011 DBG("Schedule one block\n");
1012 schedule_block(c, first, inst);
1013 }
1014 }