r300/compiler: Move declaration before code.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_pair_schedule.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_program_pair.h"
29
30 #include <stdio.h>
31
32 #include "radeon_compiler.h"
33 #include "radeon_dataflow.h"
34
35
36 #define VERBOSE 0
37
38 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
39
40 struct schedule_instruction {
41 struct rc_instruction * Instruction;
42
43 /** Next instruction in the linked list of ready instructions. */
44 struct schedule_instruction *NextReady;
45
46 /** Values that this instruction reads and writes */
47 struct reg_value * WriteValues[4];
48 struct reg_value * ReadValues[12];
49 unsigned int NumWriteValues:3;
50 unsigned int NumReadValues:4;
51
52 /**
53 * Number of (read and write) dependencies that must be resolved before
54 * this instruction can be scheduled.
55 */
56 unsigned int NumDependencies:5;
57 };
58
59
60 /**
61 * Used to keep track of which instructions read a value.
62 */
63 struct reg_value_reader {
64 struct schedule_instruction *Reader;
65 struct reg_value_reader *Next;
66 };
67
68 /**
69 * Used to keep track which values are stored in each component of a
70 * RC_FILE_TEMPORARY.
71 */
72 struct reg_value {
73 struct schedule_instruction * Writer;
74
75 /**
76 * Unordered linked list of instructions that read from this value.
77 * When this value becomes available, we increase all readers'
78 * dependency count.
79 */
80 struct reg_value_reader *Readers;
81
82 /**
83 * Number of readers of this value. This is decremented each time
84 * a reader of the value is committed.
85 * When the reader cound reaches zero, the dependency count
86 * of the instruction writing \ref Next is decremented.
87 */
88 unsigned int NumReaders;
89
90 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
91 };
92
93 struct register_state {
94 struct reg_value * Values[4];
95 };
96
97 struct schedule_state {
98 struct radeon_compiler * C;
99 struct schedule_instruction * Current;
100
101 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
102
103 /**
104 * Linked lists of instructions that can be scheduled right now,
105 * based on which ALU/TEX resources they require.
106 */
107 /*@{*/
108 struct schedule_instruction *ReadyFullALU;
109 struct schedule_instruction *ReadyRGB;
110 struct schedule_instruction *ReadyAlpha;
111 struct schedule_instruction *ReadyTEX;
112 /*@}*/
113 };
114
115 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
116 rc_register_file file, unsigned int index, unsigned int chan)
117 {
118 if (file != RC_FILE_TEMPORARY)
119 return 0;
120
121 if (index >= RC_REGISTER_MAX_INDEX) {
122 rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
123 return 0;
124 }
125
126 return &s->Temporary[index].Values[chan];
127 }
128
129 static struct reg_value * get_reg_value(struct schedule_state * s,
130 rc_register_file file, unsigned int index, unsigned int chan)
131 {
132 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
133 if (!pv)
134 return 0;
135 return *pv;
136 }
137
138 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
139 {
140 inst->NextReady = *list;
141 *list = inst;
142 }
143
144 static void add_inst_to_list_end(struct schedule_instruction ** list,
145 struct schedule_instruction * inst)
146 {
147 if(!*list){
148 *list = inst;
149 }else{
150 struct schedule_instruction * temp = *list;
151 while(temp->NextReady){
152 temp = temp->NextReady;
153 }
154 temp->NextReady = inst;
155 }
156 }
157
158 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
159 {
160 DBG("%i is now ready\n", sinst->Instruction->IP);
161
162 /* Adding Ready TEX instructions to the end of the "Ready List" helps
163 * us emit TEX instructions in blocks without losing our place. */
164 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
165 add_inst_to_list_end(&s->ReadyTEX, sinst);
166 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
167 add_inst_to_list(&s->ReadyRGB, sinst);
168 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
169 add_inst_to_list(&s->ReadyAlpha, sinst);
170 else
171 add_inst_to_list(&s->ReadyFullALU, sinst);
172 }
173
174 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
175 {
176 assert(sinst->NumDependencies > 0);
177 sinst->NumDependencies--;
178 if (!sinst->NumDependencies)
179 instruction_ready(s, sinst);
180 }
181
182 /**
183 * This function decreases the dependencies of the next instruction that
184 * wants to write to each of sinst's read values.
185 */
186 static void commit_update_reads(struct schedule_state * s,
187 struct schedule_instruction * sinst){
188 unsigned int i;
189 for(i = 0; i < sinst->NumReadValues; ++i) {
190 struct reg_value * v = sinst->ReadValues[i];
191 assert(v->NumReaders > 0);
192 v->NumReaders--;
193 if (!v->NumReaders) {
194 if (v->Next)
195 decrease_dependencies(s, v->Next->Writer);
196 }
197 }
198 }
199
200 static void commit_update_writes(struct schedule_state * s,
201 struct schedule_instruction * sinst){
202 unsigned int i;
203 for(i = 0; i < sinst->NumWriteValues; ++i) {
204 struct reg_value * v = sinst->WriteValues[i];
205 if (v->NumReaders) {
206 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
207 decrease_dependencies(s, r->Reader);
208 }
209 } else {
210 /* This happens in instruction sequences of the type
211 * OP r.x, ...;
212 * OP r.x, r.x, ...;
213 * See also the subtlety in how instructions that both
214 * read and write the same register are scanned.
215 */
216 if (v->Next)
217 decrease_dependencies(s, v->Next->Writer);
218 }
219 }
220 }
221
222 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
223 {
224 DBG("%i: commit\n", sinst->Instruction->IP);
225
226 commit_update_reads(s, sinst);
227
228 commit_update_writes(s, sinst);
229 }
230
231 /**
232 * Emit all ready texture instructions in a single block.
233 *
234 * Emit as a single block to (hopefully) sample many textures in parallel,
235 * and to avoid hardware indirections on R300.
236 */
237 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
238 {
239 struct schedule_instruction *readytex;
240 struct rc_instruction * inst_begin;
241
242 assert(s->ReadyTEX);
243
244 /* Node marker for R300 */
245 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
246 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
247
248 /* Link texture instructions back in */
249 readytex = s->ReadyTEX;
250 while(readytex) {
251 rc_insert_instruction(before->Prev, readytex->Instruction);
252 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
253
254 /* All of the TEX instructions in the same TEX block have
255 * their source registers read from before any of the
256 * instructions in that block write to their destination
257 * registers. This means that when we commit a TEX
258 * instruction, any other TEX instruction that wants to write
259 * to one of the committed instruction's source register can be
260 * marked as ready and should be emitted in the same TEX
261 * block. This prevents the following sequence from being
262 * emitted in two different TEX blocks:
263 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
264 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
265 */
266 commit_update_reads(s, readytex);
267 readytex = readytex->NextReady;
268 }
269 readytex = s->ReadyTEX;
270 s->ReadyTEX = 0;
271 while(readytex){
272 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
273 commit_update_writes(s, readytex);
274 readytex = readytex->NextReady;
275 }
276 }
277
278 /* This is a helper function for destructive_merge_instructions(). It helps
279 * merge presubtract sources from two instructions and makes sure the
280 * presubtract sources end up in the correct spot. This function assumes that
281 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
282 * but no scalar instruction (alpha).
283 * @return 0 if merging the presubtract sources fails.
284 * @retrun 1 if merging the presubtract sources succeeds.
285 */
286 static int merge_presub_sources(
287 struct rc_pair_instruction * dst_full,
288 struct rc_pair_sub_instruction src,
289 unsigned int type)
290 {
291 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
292 struct rc_pair_sub_instruction * dst_sub;
293 const struct rc_opcode_info * info;
294
295 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
296
297 switch(type) {
298 case RC_PAIR_SOURCE_RGB:
299 is_rgb = 1;
300 is_alpha = 0;
301 dst_sub = &dst_full->RGB;
302 break;
303 case RC_PAIR_SOURCE_ALPHA:
304 is_rgb = 0;
305 is_alpha = 1;
306 dst_sub = &dst_full->Alpha;
307 break;
308 default:
309 assert(0);
310 return 0;
311 }
312
313 info = rc_get_opcode_info(dst_full->RGB.Opcode);
314
315 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
316 return 0;
317
318 srcp_regs = rc_presubtract_src_reg_count(
319 src.Src[RC_PAIR_PRESUB_SRC].Index);
320 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
321 unsigned int arg;
322 int free_source;
323 unsigned int one_way = 0;
324 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
325 struct rc_pair_instruction_source temp;
326
327 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
328 srcp.File, srcp.Index);
329
330 /* If free_source < 0 then there are no free source
331 * slots. */
332 if (free_source < 0)
333 return 0;
334
335 temp = dst_sub->Src[srcp_src];
336 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
337
338 /* srcp needs src0 and src1 to be the same */
339 if (free_source < srcp_src) {
340 if (!temp.Used)
341 continue;
342 free_source = rc_pair_alloc_source(dst_full, is_rgb,
343 is_alpha, temp.File, temp.Index);
344 one_way = 1;
345 } else {
346 dst_sub->Src[free_source] = temp;
347 }
348
349 /* If free_source == srcp_src, then the presubtract
350 * source is already in the correct place. */
351 if (free_source == srcp_src)
352 continue;
353
354 /* Shuffle the sources, so we can put the
355 * presubtract source in the correct place. */
356 for(arg = 0; arg < info->NumSrcRegs; arg++) {
357 /*If this arg does not read from an rgb source,
358 * do nothing. */
359 if (!(rc_source_type_that_arg_reads(
360 dst_full->RGB.Arg[arg].Source,
361 dst_full->RGB.Arg[arg].Swizzle) & type)) {
362 continue;
363 }
364 if (dst_full->RGB.Arg[arg].Source == srcp_src)
365 dst_full->RGB.Arg[arg].Source = free_source;
366 /* We need to do this just in case register
367 * is one of the sources already, but in the
368 * wrong spot. */
369 else if(dst_full->RGB.Arg[arg].Source == free_source
370 && !one_way) {
371 dst_full->RGB.Arg[arg].Source = srcp_src;
372 }
373 }
374 }
375 return 1;
376 }
377
378
379 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
380 static int destructive_merge_instructions(
381 struct rc_pair_instruction * rgb,
382 struct rc_pair_instruction * alpha)
383 {
384 const struct rc_opcode_info * opcode;
385
386 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
387 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
388
389 /* Presubtract registers need to be merged first so that registers
390 * needed by the presubtract operation can be placed in src0 and/or
391 * src1. */
392
393 /* Merge the rgb presubtract registers. */
394 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
395 if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
396 return 0;
397 }
398 }
399 /* Merge the alpha presubtract registers */
400 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
401 if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
402 return 0;
403 }
404 }
405
406 /* Copy alpha args into rgb */
407 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
408
409 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
410 unsigned int srcrgb = 0;
411 unsigned int srcalpha = 0;
412 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
413 rc_register_file file = 0;
414 unsigned int index = 0;
415 int source;
416
417 if (alpha->Alpha.Arg[arg].Swizzle < 3) {
418 srcrgb = 1;
419 file = alpha->RGB.Src[oldsrc].File;
420 index = alpha->RGB.Src[oldsrc].Index;
421 } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
422 srcalpha = 1;
423 file = alpha->Alpha.Src[oldsrc].File;
424 index = alpha->Alpha.Src[oldsrc].Index;
425 }
426
427 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
428 if (source < 0)
429 return 0;
430
431 rgb->Alpha.Arg[arg].Source = source;
432 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
433 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
434 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
435 }
436
437 /* Copy alpha opcode into rgb */
438 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
439 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
440 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
441 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
442 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
443 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
444
445 /* Merge ALU result writing */
446 if (alpha->WriteALUResult) {
447 if (rgb->WriteALUResult)
448 return 0;
449
450 rgb->WriteALUResult = alpha->WriteALUResult;
451 rgb->ALUResultCompare = alpha->ALUResultCompare;
452 }
453
454 return 1;
455 }
456
457 /**
458 * Try to merge the given instructions into the rgb instructions.
459 *
460 * Return true on success; on failure, return false, and keep
461 * the instructions untouched.
462 */
463 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
464 {
465 struct rc_pair_instruction backup;
466
467 /*Instructions can't write output registers and ALU result at the
468 * same time. */
469 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
470 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
471 return 0;
472 }
473 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
474
475 if (destructive_merge_instructions(rgb, alpha))
476 return 1;
477
478 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
479 return 0;
480 }
481
482 static void presub_nop(struct rc_instruction * emitted) {
483 int prev_rgb_index, prev_alpha_index, i, num_src;
484
485 /* We don't need a nop if the previous instruction is a TEX. */
486 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
487 return;
488 }
489 if (emitted->Prev->U.P.RGB.WriteMask)
490 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
491 else
492 prev_rgb_index = -1;
493 if (emitted->Prev->U.P.Alpha.WriteMask)
494 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
495 else
496 prev_alpha_index = 1;
497
498 /* Check the previous rgb instruction */
499 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
500 num_src = rc_presubtract_src_reg_count(
501 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
502 for (i = 0; i < num_src; i++) {
503 unsigned int index = emitted->U.P.RGB.Src[i].Index;
504 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
505 && (index == prev_rgb_index
506 || index == prev_alpha_index)) {
507 emitted->Prev->U.P.Nop = 1;
508 return;
509 }
510 }
511 }
512
513 /* Check the previous alpha instruction. */
514 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
515 return;
516
517 num_src = rc_presubtract_src_reg_count(
518 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
519 for (i = 0; i < num_src; i++) {
520 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
521 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
522 && (index == prev_rgb_index || index == prev_alpha_index)) {
523 emitted->Prev->U.P.Nop = 1;
524 return;
525 }
526 }
527 }
528 /**
529 * Find a good ALU instruction or pair of ALU instruction and emit it.
530 *
531 * Prefer emitting full ALU instructions, so that when we reach a point
532 * where no full ALU instruction can be emitted, we have more candidates
533 * for RGB/Alpha pairing.
534 */
535 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
536 {
537 struct schedule_instruction * sinst;
538
539 if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
540 if (s->ReadyFullALU) {
541 sinst = s->ReadyFullALU;
542 s->ReadyFullALU = s->ReadyFullALU->NextReady;
543 } else if (s->ReadyRGB) {
544 sinst = s->ReadyRGB;
545 s->ReadyRGB = s->ReadyRGB->NextReady;
546 } else {
547 sinst = s->ReadyAlpha;
548 s->ReadyAlpha = s->ReadyAlpha->NextReady;
549 }
550
551 rc_insert_instruction(before->Prev, sinst->Instruction);
552 commit_alu_instruction(s, sinst);
553 } else {
554 struct schedule_instruction **prgb;
555 struct schedule_instruction **palpha;
556
557 /* Some pairings might fail because they require too
558 * many source slots; try all possible pairings if necessary */
559 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
560 for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
561 struct schedule_instruction * psirgb = *prgb;
562 struct schedule_instruction * psialpha = *palpha;
563
564 if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
565 continue;
566
567 *prgb = (*prgb)->NextReady;
568 *palpha = (*palpha)->NextReady;
569 rc_insert_instruction(before->Prev, psirgb->Instruction);
570 commit_alu_instruction(s, psirgb);
571 commit_alu_instruction(s, psialpha);
572 goto success;
573 }
574 }
575
576 /* No success in pairing; just take the first RGB instruction */
577 sinst = s->ReadyRGB;
578 s->ReadyRGB = s->ReadyRGB->NextReady;
579
580 rc_insert_instruction(before->Prev, sinst->Instruction);
581 commit_alu_instruction(s, sinst);
582 success: ;
583 }
584 /* If the instruction we just emitted uses a presubtract value, and
585 * the presubtract sources were written by the previous intstruction,
586 * the previous instruction needs a nop. */
587 presub_nop(before->Prev);
588 }
589
590 static void scan_read(void * data, struct rc_instruction * inst,
591 rc_register_file file, unsigned int index, unsigned int chan)
592 {
593 struct schedule_state * s = data;
594 struct reg_value * v = get_reg_value(s, file, index, chan);
595 struct reg_value_reader * reader;
596
597 if (!v)
598 return;
599
600 if (v->Writer == s->Current) {
601 /* The instruction reads and writes to a register component.
602 * In this case, we only want to increment dependencies by one.
603 */
604 return;
605 }
606
607 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
608
609 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
610 reader->Reader = s->Current;
611 reader->Next = v->Readers;
612 v->Readers = reader;
613 v->NumReaders++;
614
615 s->Current->NumDependencies++;
616
617 if (s->Current->NumReadValues >= 12) {
618 rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
619 } else {
620 s->Current->ReadValues[s->Current->NumReadValues++] = v;
621 }
622 }
623
624 static void scan_write(void * data, struct rc_instruction * inst,
625 rc_register_file file, unsigned int index, unsigned int chan)
626 {
627 struct schedule_state * s = data;
628 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
629 struct reg_value * newv;
630
631 if (!pv)
632 return;
633
634 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
635
636 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
637 memset(newv, 0, sizeof(*newv));
638
639 newv->Writer = s->Current;
640
641 if (*pv) {
642 (*pv)->Next = newv;
643 s->Current->NumDependencies++;
644 }
645
646 *pv = newv;
647
648 if (s->Current->NumWriteValues >= 4) {
649 rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
650 } else {
651 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
652 }
653 }
654
655 static void schedule_block(struct r300_fragment_program_compiler * c,
656 struct rc_instruction * begin, struct rc_instruction * end)
657 {
658 struct schedule_state s;
659 unsigned int ip;
660
661 memset(&s, 0, sizeof(s));
662 s.C = &c->Base;
663
664 /* Scan instructions for data dependencies */
665 ip = 0;
666 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
667 s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
668 memset(s.Current, 0, sizeof(struct schedule_instruction));
669
670 s.Current->Instruction = inst;
671 inst->IP = ip++;
672
673 DBG("%i: Scanning\n", inst->IP);
674
675 /* The order of things here is subtle and maybe slightly
676 * counter-intuitive, to account for the case where an
677 * instruction writes to the same register as it reads
678 * from. */
679 rc_for_all_writes_chan(inst, &scan_write, &s);
680 rc_for_all_reads_chan(inst, &scan_read, &s);
681
682 DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
683
684 if (!s.Current->NumDependencies)
685 instruction_ready(&s, s.Current);
686 }
687
688 /* Temporarily unlink all instructions */
689 begin->Prev->Next = end;
690 end->Prev = begin->Prev;
691
692 /* Schedule instructions back */
693 while(!s.C->Error &&
694 (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
695 if (s.ReadyTEX)
696 emit_all_tex(&s, end);
697
698 while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
699 emit_one_alu(&s, end);
700 }
701 }
702
703 static int is_controlflow(struct rc_instruction * inst)
704 {
705 if (inst->Type == RC_INSTRUCTION_NORMAL) {
706 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
707 return opcode->IsFlowControl;
708 }
709 return 0;
710 }
711
712 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
713 {
714 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
715 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
716 while(inst != &c->Base.Program.Instructions) {
717 struct rc_instruction * first;
718
719 if (is_controlflow(inst)) {
720 inst = inst->Next;
721 continue;
722 }
723
724 first = inst;
725
726 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
727 inst = inst->Next;
728
729 DBG("Schedule one block\n");
730 schedule_block(c, first, inst);
731 }
732 }