r300g: copy the compiler from r300c
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_pair_schedule.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_compiler_util.h"
34 #include "radeon_dataflow.h"
35
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct schedule_instruction {
42 struct rc_instruction * Instruction;
43
44 /** Next instruction in the linked list of ready instructions. */
45 struct schedule_instruction *NextReady;
46
47 /** Values that this instruction reads and writes */
48 struct reg_value * WriteValues[4];
49 struct reg_value * ReadValues[12];
50 unsigned int NumWriteValues:3;
51 unsigned int NumReadValues:4;
52
53 /**
54 * Number of (read and write) dependencies that must be resolved before
55 * this instruction can be scheduled.
56 */
57 unsigned int NumDependencies:5;
58
59 /** List of all readers (see rc_get_readers() for the definition of
60 * "all readers"), even those outside the basic block this instruction
61 * lives in. */
62 struct rc_reader_data GlobalReaders;
63 };
64
65
66 /**
67 * Used to keep track of which instructions read a value.
68 */
69 struct reg_value_reader {
70 struct schedule_instruction *Reader;
71 struct reg_value_reader *Next;
72 };
73
74 /**
75 * Used to keep track which values are stored in each component of a
76 * RC_FILE_TEMPORARY.
77 */
78 struct reg_value {
79 struct schedule_instruction * Writer;
80
81 /**
82 * Unordered linked list of instructions that read from this value.
83 * When this value becomes available, we increase all readers'
84 * dependency count.
85 */
86 struct reg_value_reader *Readers;
87
88 /**
89 * Number of readers of this value. This is decremented each time
90 * a reader of the value is committed.
91 * When the reader cound reaches zero, the dependency count
92 * of the instruction writing \ref Next is decremented.
93 */
94 unsigned int NumReaders;
95
96 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
97 };
98
99 struct register_state {
100 struct reg_value * Values[4];
101 };
102
103 struct remap_reg {
104 struct rc_instruciont * Inst;
105 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
106 unsigned int OldSwizzle:3;
107 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
108 unsigned int NewSwizzle:3;
109 unsigned int OnlyTexReads:1;
110 struct remap_reg * Next;
111 };
112
113 struct schedule_state {
114 struct radeon_compiler * C;
115 struct schedule_instruction * Current;
116
117 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
118
119 /**
120 * Linked lists of instructions that can be scheduled right now,
121 * based on which ALU/TEX resources they require.
122 */
123 /*@{*/
124 struct schedule_instruction *ReadyFullALU;
125 struct schedule_instruction *ReadyRGB;
126 struct schedule_instruction *ReadyAlpha;
127 struct schedule_instruction *ReadyTEX;
128 /*@}*/
129 };
130
131 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
132 rc_register_file file, unsigned int index, unsigned int chan)
133 {
134 if (file != RC_FILE_TEMPORARY)
135 return 0;
136
137 if (index >= RC_REGISTER_MAX_INDEX) {
138 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
139 return 0;
140 }
141
142 return &s->Temporary[index].Values[chan];
143 }
144
145 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
146 {
147 inst->NextReady = *list;
148 *list = inst;
149 }
150
151 static void add_inst_to_list_end(struct schedule_instruction ** list,
152 struct schedule_instruction * inst)
153 {
154 if(!*list){
155 *list = inst;
156 }else{
157 struct schedule_instruction * temp = *list;
158 while(temp->NextReady){
159 temp = temp->NextReady;
160 }
161 temp->NextReady = inst;
162 }
163 }
164
165 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
166 {
167 DBG("%i is now ready\n", sinst->Instruction->IP);
168
169 /* Adding Ready TEX instructions to the end of the "Ready List" helps
170 * us emit TEX instructions in blocks without losing our place. */
171 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
172 add_inst_to_list_end(&s->ReadyTEX, sinst);
173 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
174 add_inst_to_list(&s->ReadyRGB, sinst);
175 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
176 add_inst_to_list(&s->ReadyAlpha, sinst);
177 else
178 add_inst_to_list(&s->ReadyFullALU, sinst);
179 }
180
181 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
182 {
183 assert(sinst->NumDependencies > 0);
184 sinst->NumDependencies--;
185 if (!sinst->NumDependencies)
186 instruction_ready(s, sinst);
187 }
188
189 /**
190 * This function decreases the dependencies of the next instruction that
191 * wants to write to each of sinst's read values.
192 */
193 static void commit_update_reads(struct schedule_state * s,
194 struct schedule_instruction * sinst){
195 unsigned int i;
196 for(i = 0; i < sinst->NumReadValues; ++i) {
197 struct reg_value * v = sinst->ReadValues[i];
198 assert(v->NumReaders > 0);
199 v->NumReaders--;
200 if (!v->NumReaders) {
201 if (v->Next)
202 decrease_dependencies(s, v->Next->Writer);
203 }
204 }
205 }
206
207 static void commit_update_writes(struct schedule_state * s,
208 struct schedule_instruction * sinst){
209 unsigned int i;
210 for(i = 0; i < sinst->NumWriteValues; ++i) {
211 struct reg_value * v = sinst->WriteValues[i];
212 if (v->NumReaders) {
213 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
214 decrease_dependencies(s, r->Reader);
215 }
216 } else {
217 /* This happens in instruction sequences of the type
218 * OP r.x, ...;
219 * OP r.x, r.x, ...;
220 * See also the subtlety in how instructions that both
221 * read and write the same register are scanned.
222 */
223 if (v->Next)
224 decrease_dependencies(s, v->Next->Writer);
225 }
226 }
227 }
228
229 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
230 {
231 DBG("%i: commit\n", sinst->Instruction->IP);
232
233 commit_update_reads(s, sinst);
234
235 commit_update_writes(s, sinst);
236 }
237
238 /**
239 * Emit all ready texture instructions in a single block.
240 *
241 * Emit as a single block to (hopefully) sample many textures in parallel,
242 * and to avoid hardware indirections on R300.
243 */
244 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
245 {
246 struct schedule_instruction *readytex;
247 struct rc_instruction * inst_begin;
248
249 assert(s->ReadyTEX);
250
251 /* Node marker for R300 */
252 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
253 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
254
255 /* Link texture instructions back in */
256 readytex = s->ReadyTEX;
257 while(readytex) {
258 rc_insert_instruction(before->Prev, readytex->Instruction);
259 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
260
261 /* All of the TEX instructions in the same TEX block have
262 * their source registers read from before any of the
263 * instructions in that block write to their destination
264 * registers. This means that when we commit a TEX
265 * instruction, any other TEX instruction that wants to write
266 * to one of the committed instruction's source register can be
267 * marked as ready and should be emitted in the same TEX
268 * block. This prevents the following sequence from being
269 * emitted in two different TEX blocks:
270 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
271 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
272 */
273 commit_update_reads(s, readytex);
274 readytex = readytex->NextReady;
275 }
276 readytex = s->ReadyTEX;
277 s->ReadyTEX = 0;
278 while(readytex){
279 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
280 commit_update_writes(s, readytex);
281 readytex = readytex->NextReady;
282 }
283 }
284
285 /* This is a helper function for destructive_merge_instructions(). It helps
286 * merge presubtract sources from two instructions and makes sure the
287 * presubtract sources end up in the correct spot. This function assumes that
288 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
289 * but no scalar instruction (alpha).
290 * @return 0 if merging the presubtract sources fails.
291 * @retrun 1 if merging the presubtract sources succeeds.
292 */
293 static int merge_presub_sources(
294 struct rc_pair_instruction * dst_full,
295 struct rc_pair_sub_instruction src,
296 unsigned int type)
297 {
298 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
299 struct rc_pair_sub_instruction * dst_sub;
300 const struct rc_opcode_info * info;
301
302 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
303
304 switch(type) {
305 case RC_SOURCE_RGB:
306 is_rgb = 1;
307 is_alpha = 0;
308 dst_sub = &dst_full->RGB;
309 break;
310 case RC_SOURCE_ALPHA:
311 is_rgb = 0;
312 is_alpha = 1;
313 dst_sub = &dst_full->Alpha;
314 break;
315 default:
316 assert(0);
317 return 0;
318 }
319
320 info = rc_get_opcode_info(dst_full->RGB.Opcode);
321
322 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
323 return 0;
324
325 srcp_regs = rc_presubtract_src_reg_count(
326 src.Src[RC_PAIR_PRESUB_SRC].Index);
327 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
328 unsigned int arg;
329 int free_source;
330 unsigned int one_way = 0;
331 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
332 struct rc_pair_instruction_source temp;
333
334 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
335 srcp.File, srcp.Index);
336
337 /* If free_source < 0 then there are no free source
338 * slots. */
339 if (free_source < 0)
340 return 0;
341
342 temp = dst_sub->Src[srcp_src];
343 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
344
345 /* srcp needs src0 and src1 to be the same */
346 if (free_source < srcp_src) {
347 if (!temp.Used)
348 continue;
349 free_source = rc_pair_alloc_source(dst_full, is_rgb,
350 is_alpha, temp.File, temp.Index);
351 if (free_source < 0)
352 return 0;
353 one_way = 1;
354 } else {
355 dst_sub->Src[free_source] = temp;
356 }
357
358 /* If free_source == srcp_src, then the presubtract
359 * source is already in the correct place. */
360 if (free_source == srcp_src)
361 continue;
362
363 /* Shuffle the sources, so we can put the
364 * presubtract source in the correct place. */
365 for(arg = 0; arg < info->NumSrcRegs; arg++) {
366 /*If this arg does not read from an rgb source,
367 * do nothing. */
368 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
369 & type)) {
370 continue;
371 }
372
373 if (dst_full->RGB.Arg[arg].Source == srcp_src)
374 dst_full->RGB.Arg[arg].Source = free_source;
375 /* We need to do this just in case register
376 * is one of the sources already, but in the
377 * wrong spot. */
378 else if(dst_full->RGB.Arg[arg].Source == free_source
379 && !one_way) {
380 dst_full->RGB.Arg[arg].Source = srcp_src;
381 }
382 }
383 }
384 return 1;
385 }
386
387
388 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
389 static int destructive_merge_instructions(
390 struct rc_pair_instruction * rgb,
391 struct rc_pair_instruction * alpha)
392 {
393 const struct rc_opcode_info * opcode;
394
395 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
396 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
397
398 /* Presubtract registers need to be merged first so that registers
399 * needed by the presubtract operation can be placed in src0 and/or
400 * src1. */
401
402 /* Merge the rgb presubtract registers. */
403 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
404 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
405 return 0;
406 }
407 }
408 /* Merge the alpha presubtract registers */
409 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
410 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
411 return 0;
412 }
413 }
414
415 /* Copy alpha args into rgb */
416 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
417
418 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
419 unsigned int srcrgb = 0;
420 unsigned int srcalpha = 0;
421 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
422 rc_register_file file = 0;
423 unsigned int index = 0;
424 int source;
425
426 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
427 srcrgb = 1;
428 file = alpha->RGB.Src[oldsrc].File;
429 index = alpha->RGB.Src[oldsrc].Index;
430 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
431 srcalpha = 1;
432 file = alpha->Alpha.Src[oldsrc].File;
433 index = alpha->Alpha.Src[oldsrc].Index;
434 }
435
436 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
437 if (source < 0)
438 return 0;
439
440 rgb->Alpha.Arg[arg].Source = source;
441 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
442 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
443 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
444 }
445
446 /* Copy alpha opcode into rgb */
447 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
448 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
449 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
450 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
451 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
452 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
453
454 /* Merge ALU result writing */
455 if (alpha->WriteALUResult) {
456 if (rgb->WriteALUResult)
457 return 0;
458
459 rgb->WriteALUResult = alpha->WriteALUResult;
460 rgb->ALUResultCompare = alpha->ALUResultCompare;
461 }
462
463 return 1;
464 }
465
466 /**
467 * Try to merge the given instructions into the rgb instructions.
468 *
469 * Return true on success; on failure, return false, and keep
470 * the instructions untouched.
471 */
472 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
473 {
474 struct rc_pair_instruction backup;
475
476 /*Instructions can't write output registers and ALU result at the
477 * same time. */
478 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
479 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
480 return 0;
481 }
482 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
483
484 if (destructive_merge_instructions(rgb, alpha))
485 return 1;
486
487 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
488 return 0;
489 }
490
491 static void presub_nop(struct rc_instruction * emitted) {
492 int prev_rgb_index, prev_alpha_index, i, num_src;
493
494 /* We don't need a nop if the previous instruction is a TEX. */
495 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
496 return;
497 }
498 if (emitted->Prev->U.P.RGB.WriteMask)
499 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
500 else
501 prev_rgb_index = -1;
502 if (emitted->Prev->U.P.Alpha.WriteMask)
503 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
504 else
505 prev_alpha_index = 1;
506
507 /* Check the previous rgb instruction */
508 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
509 num_src = rc_presubtract_src_reg_count(
510 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
511 for (i = 0; i < num_src; i++) {
512 unsigned int index = emitted->U.P.RGB.Src[i].Index;
513 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
514 && (index == prev_rgb_index
515 || index == prev_alpha_index)) {
516 emitted->Prev->U.P.Nop = 1;
517 return;
518 }
519 }
520 }
521
522 /* Check the previous alpha instruction. */
523 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
524 return;
525
526 num_src = rc_presubtract_src_reg_count(
527 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
528 for (i = 0; i < num_src; i++) {
529 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
530 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
531 && (index == prev_rgb_index || index == prev_alpha_index)) {
532 emitted->Prev->U.P.Nop = 1;
533 return;
534 }
535 }
536 }
537
538 static void rgb_to_alpha_remap (
539 struct rc_instruction * inst,
540 struct rc_pair_instruction_arg * arg,
541 rc_register_file old_file,
542 rc_swizzle old_swz,
543 unsigned int new_index)
544 {
545 int new_src_index;
546 unsigned int i;
547
548 for (i = 0; i < 3; i++) {
549 if (get_swz(arg->Swizzle, i) == old_swz) {
550 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
551 }
552 }
553 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
554 old_file, new_index);
555 /* This conversion is not possible, we must have made a mistake in
556 * is_rgb_to_alpha_possible. */
557 if (new_src_index < 0) {
558 assert(0);
559 return;
560 }
561
562 arg->Source = new_src_index;
563 }
564
565 static int can_remap(unsigned int opcode)
566 {
567 switch(opcode) {
568 case RC_OPCODE_DDX:
569 case RC_OPCODE_DDY:
570 return 0;
571 default:
572 return 1;
573 }
574 }
575
576 static int can_convert_opcode_to_alpha(unsigned int opcode)
577 {
578 switch(opcode) {
579 case RC_OPCODE_DDX:
580 case RC_OPCODE_DDY:
581 case RC_OPCODE_DP2:
582 case RC_OPCODE_DP3:
583 case RC_OPCODE_DP4:
584 case RC_OPCODE_DPH:
585 return 0;
586 default:
587 return 1;
588 }
589 }
590
591 static void is_rgb_to_alpha_possible(
592 void * userdata,
593 struct rc_instruction * inst,
594 struct rc_pair_instruction_arg * arg,
595 struct rc_pair_instruction_source * src)
596 {
597 unsigned int chan_count = 0;
598 unsigned int alpha_sources = 0;
599 unsigned int i;
600 struct rc_reader_data * reader_data = userdata;
601
602 if (!can_remap(inst->U.P.RGB.Opcode)
603 || !can_remap(inst->U.P.Alpha.Opcode)) {
604 reader_data->Abort = 1;
605 return;
606 }
607
608 if (!src)
609 return;
610
611 /* XXX There are some cases where we can still do the conversion if
612 * a reader reads from a presubtract source, but for now we'll prevent
613 * it. */
614 if (arg->Source == RC_PAIR_PRESUB_SRC) {
615 reader_data->Abort = 1;
616 return;
617 }
618
619 /* Make sure the source only reads from one component.
620 * XXX We should allow the source to read from the same component twice.
621 * XXX If the index we will be converting to is the same as the
622 * current index, then it is OK to read from more than one component.
623 */
624 for (i = 0; i < 3; i++) {
625 rc_swizzle swz = get_swz(arg->Swizzle, i);
626 switch(swz) {
627 case RC_SWIZZLE_X:
628 case RC_SWIZZLE_Y:
629 case RC_SWIZZLE_Z:
630 case RC_SWIZZLE_W:
631 chan_count++;
632 break;
633 default:
634 break;
635 }
636 }
637 if (chan_count > 1) {
638 reader_data->Abort = 1;
639 return;
640 }
641
642 /* Make sure there are enough alpha sources.
643 * XXX If we know what register all the readers are going
644 * to be remapped to, then in some situations we can still do
645 * the subsitution, even if all 3 alpha sources are being used.*/
646 for (i = 0; i < 3; i++) {
647 if (inst->U.P.Alpha.Src[i].Used) {
648 alpha_sources++;
649 }
650 }
651 if (alpha_sources > 2) {
652 reader_data->Abort = 1;
653 return;
654 }
655 }
656
657 static int convert_rgb_to_alpha(
658 struct schedule_state * s,
659 struct schedule_instruction * sched_inst)
660 {
661 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
662 unsigned int old_mask = pair_inst->RGB.WriteMask;
663 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
664 const struct rc_opcode_info * info =
665 rc_get_opcode_info(pair_inst->RGB.Opcode);
666 int new_index = -1;
667 unsigned int i;
668
669 if (sched_inst->GlobalReaders.Abort)
670 return 0;
671
672 if (!pair_inst->RGB.WriteMask)
673 return 0;
674
675 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
676 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
677 return 0;
678 }
679
680 assert(sched_inst->NumWriteValues == 1);
681
682 if (!sched_inst->WriteValues[0]) {
683 assert(0);
684 return 0;
685 }
686
687 /* We start at the old index, because if we can reuse the same
688 * register and just change the swizzle then it is more likely we
689 * will be able to convert all the readers. */
690 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
691 struct reg_value ** new_regvalp = get_reg_valuep(
692 s, RC_FILE_TEMPORARY, i, 3);
693 if (!*new_regvalp) {
694 struct reg_value ** old_regvalp =
695 get_reg_valuep(s,
696 RC_FILE_TEMPORARY,
697 pair_inst->RGB.DestIndex,
698 rc_mask_to_swizzle(old_mask));
699 new_index = i;
700 *new_regvalp = *old_regvalp;
701 *old_regvalp = NULL;
702 new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
703 break;
704 }
705 }
706 if (new_index < 0) {
707 return 0;
708 }
709
710 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
711 pair_inst->Alpha.DestIndex = new_index;
712 pair_inst->Alpha.WriteMask = RC_MASK_W;
713 pair_inst->Alpha.Target = pair_inst->RGB.Target;
714 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
715 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
716 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
717 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
718 sizeof(pair_inst->Alpha.Arg));
719 /* Move the swizzles into the first chan */
720 for (i = 0; i < info->NumSrcRegs; i++) {
721 unsigned int j;
722 for (j = 0; j < 3; j++) {
723 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
724 if (swz != RC_SWIZZLE_UNUSED) {
725 pair_inst->Alpha.Arg[i].Swizzle =
726 rc_init_swizzle(swz, 1);
727 break;
728 }
729 }
730 }
731 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
732 pair_inst->RGB.DestIndex = 0;
733 pair_inst->RGB.WriteMask = 0;
734 pair_inst->RGB.Target = 0;
735 pair_inst->RGB.OutputWriteMask = 0;
736 pair_inst->RGB.DepthWriteMask = 0;
737 pair_inst->RGB.Saturate = 0;
738 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
739
740 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
741 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
742 rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
743 RC_FILE_TEMPORARY, old_swz, new_index);
744 }
745 return 1;
746 }
747
748 /**
749 * Find a good ALU instruction or pair of ALU instruction and emit it.
750 *
751 * Prefer emitting full ALU instructions, so that when we reach a point
752 * where no full ALU instruction can be emitted, we have more candidates
753 * for RGB/Alpha pairing.
754 */
755 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
756 {
757 struct schedule_instruction * sinst;
758
759 if (s->ReadyFullALU) {
760 sinst = s->ReadyFullALU;
761 s->ReadyFullALU = s->ReadyFullALU->NextReady;
762 rc_insert_instruction(before->Prev, sinst->Instruction);
763 commit_alu_instruction(s, sinst);
764 } else {
765 struct schedule_instruction **prgb;
766 struct schedule_instruction **palpha;
767 struct schedule_instruction *prev;
768 pair:
769 /* Some pairings might fail because they require too
770 * many source slots; try all possible pairings if necessary */
771 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
772 for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
773 struct schedule_instruction * psirgb = *prgb;
774 struct schedule_instruction * psialpha = *palpha;
775
776 if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
777 continue;
778
779 *prgb = (*prgb)->NextReady;
780 *palpha = (*palpha)->NextReady;
781 rc_insert_instruction(before->Prev, psirgb->Instruction);
782 commit_alu_instruction(s, psirgb);
783 commit_alu_instruction(s, psialpha);
784 goto success;
785 }
786 }
787 prev = NULL;
788 /* No success in pairing, now try to convert one of the RGB
789 * instructions to an Alpha so we can pair it with another RGB.
790 */
791 if (s->ReadyRGB && s->ReadyRGB->NextReady) {
792 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
793 if ((*prgb)->NumWriteValues == 1) {
794 struct schedule_instruction * prgb_next;
795 if (!convert_rgb_to_alpha(s, *prgb))
796 goto cont_loop;
797 prgb_next = (*prgb)->NextReady;
798 /* Add instruction to the Alpha ready list. */
799 (*prgb)->NextReady = s->ReadyAlpha;
800 s->ReadyAlpha = *prgb;
801 /* Remove instruction from the RGB ready list.*/
802 if (prev)
803 prev->NextReady = prgb_next;
804 else
805 s->ReadyRGB = prgb_next;
806 goto pair;
807 }
808 cont_loop:
809 prev = *prgb;
810 }
811 }
812 /* Still no success in pairing, just take the first RGB
813 * or alpha instruction. */
814 if (s->ReadyRGB) {
815 sinst = s->ReadyRGB;
816 s->ReadyRGB = s->ReadyRGB->NextReady;
817 } else if (s->ReadyAlpha) {
818 sinst = s->ReadyAlpha;
819 s->ReadyAlpha = s->ReadyAlpha->NextReady;
820 } else {
821 /*XXX Something real bad has happened. */
822 assert(0);
823 }
824
825 rc_insert_instruction(before->Prev, sinst->Instruction);
826 commit_alu_instruction(s, sinst);
827 success: ;
828 }
829 /* If the instruction we just emitted uses a presubtract value, and
830 * the presubtract sources were written by the previous intstruction,
831 * the previous instruction needs a nop. */
832 presub_nop(before->Prev);
833 }
834
835 static void scan_read(void * data, struct rc_instruction * inst,
836 rc_register_file file, unsigned int index, unsigned int chan)
837 {
838 struct schedule_state * s = data;
839 struct reg_value ** v = get_reg_valuep(s, file, index, chan);
840 struct reg_value_reader * reader;
841
842 if (!v)
843 return;
844
845 if (*v && (*v)->Writer == s->Current) {
846 /* The instruction reads and writes to a register component.
847 * In this case, we only want to increment dependencies by one.
848 */
849 return;
850 }
851
852 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
853
854 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
855 reader->Reader = s->Current;
856 if (!*v) {
857 /* In this situation, the instruction reads from a register
858 * that hasn't been written to or read from in the current
859 * block. */
860 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
861 memset(*v, 0, sizeof(struct reg_value));
862 (*v)->Readers = reader;
863 } else {
864 reader->Next = (*v)->Readers;
865 (*v)->Readers = reader;
866 /* Only update the current instruction's dependencies if the
867 * register it reads from has been written to in this block. */
868 if ((*v)->Writer) {
869 s->Current->NumDependencies++;
870 }
871 }
872 (*v)->NumReaders++;
873
874 if (s->Current->NumReadValues >= 12) {
875 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
876 } else {
877 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
878 }
879 }
880
881 static void scan_write(void * data, struct rc_instruction * inst,
882 rc_register_file file, unsigned int index, unsigned int chan)
883 {
884 struct schedule_state * s = data;
885 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
886 struct reg_value * newv;
887
888 if (!pv)
889 return;
890
891 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
892
893 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
894 memset(newv, 0, sizeof(*newv));
895
896 newv->Writer = s->Current;
897
898 if (*pv) {
899 (*pv)->Next = newv;
900 s->Current->NumDependencies++;
901 }
902
903 *pv = newv;
904
905 if (s->Current->NumWriteValues >= 4) {
906 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
907 } else {
908 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
909 }
910 }
911
912 static void is_rgb_to_alpha_possible_normal(
913 void * userdata,
914 struct rc_instruction * inst,
915 struct rc_src_register * src)
916 {
917 struct rc_reader_data * reader_data = userdata;
918 reader_data->Abort = 1;
919
920 }
921
922 static void schedule_block(struct r300_fragment_program_compiler * c,
923 struct rc_instruction * begin, struct rc_instruction * end)
924 {
925 struct schedule_state s;
926 unsigned int ip;
927
928 memset(&s, 0, sizeof(s));
929 s.C = &c->Base;
930
931 /* Scan instructions for data dependencies */
932 ip = 0;
933 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
934 s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
935 memset(s.Current, 0, sizeof(struct schedule_instruction));
936
937 s.Current->Instruction = inst;
938 inst->IP = ip++;
939
940 DBG("%i: Scanning\n", inst->IP);
941
942 /* The order of things here is subtle and maybe slightly
943 * counter-intuitive, to account for the case where an
944 * instruction writes to the same register as it reads
945 * from. */
946 rc_for_all_writes_chan(inst, &scan_write, &s);
947 rc_for_all_reads_chan(inst, &scan_read, &s);
948
949 DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
950
951 if (!s.Current->NumDependencies)
952 instruction_ready(&s, s.Current);
953
954 /* Get global readers for possible RGB->Alpha conversion. */
955 s.Current->GlobalReaders.ExitOnAbort = 1;
956 rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
957 is_rgb_to_alpha_possible_normal,
958 is_rgb_to_alpha_possible, NULL);
959 }
960
961 /* Temporarily unlink all instructions */
962 begin->Prev->Next = end;
963 end->Prev = begin->Prev;
964
965 /* Schedule instructions back */
966 while(!s.C->Error &&
967 (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
968 if (s.ReadyTEX)
969 emit_all_tex(&s, end);
970
971 while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
972 emit_one_alu(&s, end);
973 }
974 }
975
976 static int is_controlflow(struct rc_instruction * inst)
977 {
978 if (inst->Type == RC_INSTRUCTION_NORMAL) {
979 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
980 return opcode->IsFlowControl;
981 }
982 return 0;
983 }
984
985 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
986 {
987 struct schedule_state s;
988
989 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
990 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
991
992 memset(&s, 0, sizeof(s));
993 s.C = &c->Base;
994 while(inst != &c->Base.Program.Instructions) {
995 struct rc_instruction * first;
996
997 if (is_controlflow(inst)) {
998 inst = inst->Next;
999 continue;
1000 }
1001
1002 first = inst;
1003
1004 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1005 inst = inst->Next;
1006
1007 DBG("Schedule one block\n");
1008 schedule_block(c, first, inst);
1009 }
1010 }