Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_program_pair.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * Perform temporary register allocation and attempt to pair off instructions
32 * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
33 * vs. ALU instruction scheduling.
34 */
35
36 #include "radeon_program_pair.h"
37
38 #include "radeon_context.h"
39
40 #include "shader/prog_print.h"
41
42 #define error(fmt, args...) do { \
43 _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \
44 __FILE__, __FUNCTION__, ##args); \
45 s->Error = GL_TRUE; \
46 } while(0)
47
48 struct pair_state_instruction {
49 GLuint IsTex:1; /**< Is a texture instruction */
50 GLuint IsOutput:1; /**< Is output instruction */
51 GLuint NeedRGB:1; /**< Needs the RGB ALU */
52 GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
53 GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
54
55 /**
56 * Number of (read and write) dependencies that must be resolved before
57 * this instruction can be scheduled.
58 */
59 GLuint NumDependencies:5;
60
61 /**
62 * Next instruction in the linked list of ready instructions.
63 */
64 struct pair_state_instruction *NextReady;
65
66 /**
67 * Values that this instruction writes
68 */
69 struct reg_value *Values[4];
70 };
71
72
73 /**
74 * Used to keep track of which instructions read a value.
75 */
76 struct reg_value_reader {
77 GLuint IP; /**< IP of the instruction that performs this access */
78 struct reg_value_reader *Next;
79 };
80
81 /**
82 * Used to keep track which values are stored in each component of a
83 * PROGRAM_TEMPORARY.
84 */
85 struct reg_value {
86 GLuint IP; /**< IP of the instruction that writes this value */
87 struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
88
89 /**
90 * Unordered linked list of instructions that read from this value.
91 */
92 struct reg_value_reader *Readers;
93
94 /**
95 * Number of readers of this value. This is calculated during @ref scan_instructions
96 * and continually decremented during code emission.
97 * When this count reaches zero, the instruction that writes the @ref Next value
98 * can be scheduled.
99 */
100 GLuint NumReaders;
101 };
102
103 /**
104 * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
105 * to the proper hardware temporary.
106 */
107 struct pair_register_translation {
108 GLuint Allocated:1;
109 GLuint HwIndex:8;
110 GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
111
112 /**
113 * Notes the value that is currently contained in each component
114 * (only used for PROGRAM_TEMPORARY registers).
115 */
116 struct reg_value *Value[4];
117 };
118
119 struct pair_state {
120 GLcontext *Ctx;
121 struct gl_program *Program;
122 const struct radeon_pair_handler *Handler;
123 GLboolean Error;
124 GLboolean Debug;
125 GLboolean Verbose;
126 void *UserData;
127 GLubyte NumKillInsts;
128
129 /**
130 * Translate Mesa registers to hardware registers
131 */
132 struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
133 struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
134
135 /**
136 * Derived information about program instructions.
137 */
138 struct pair_state_instruction *Instructions;
139
140 struct {
141 GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
142 } HwTemps[128];
143
144 /**
145 * Linked list of instructions that can be scheduled right now,
146 * based on which ALU/TEX resources they require.
147 */
148 struct pair_state_instruction *ReadyFullALU;
149 struct pair_state_instruction *ReadyRGB;
150 struct pair_state_instruction *ReadyAlpha;
151 struct pair_state_instruction *ReadyTEX;
152
153 /**
154 * Linked list of deferred instructions
155 */
156 struct pair_state_instruction *DeferredInsts;
157
158 /**
159 * Pool of @ref reg_value structures for fast allocation.
160 */
161 struct reg_value *ValuePool;
162 GLuint ValuePoolUsed;
163 struct reg_value_reader *ReaderPool;
164 GLuint ReaderPoolUsed;
165 };
166
167
168 static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
169 {
170 switch(file) {
171 case PROGRAM_TEMPORARY: return &s->Temps[index];
172 case PROGRAM_INPUT: return &s->Inputs[index];
173 default: return 0;
174 }
175 }
176
177 static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
178 {
179 struct pair_register_translation *t = get_register(s, file, index);
180 ASSERT(!s->HwTemps[hwindex].RefCount);
181 ASSERT(!t->Allocated);
182 s->HwTemps[hwindex].RefCount = t->RefCount;
183 t->Allocated = 1;
184 t->HwIndex = hwindex;
185 }
186
187 static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
188 {
189 GLuint hwindex;
190
191 struct pair_register_translation *t = get_register(s, file, index);
192 if (!t) {
193 _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);
194 return 0;
195 }
196
197 if (t->Allocated)
198 return t->HwIndex;
199
200 for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
201 if (!s->HwTemps[hwindex].RefCount)
202 break;
203
204 if (hwindex >= s->Handler->MaxHwTemps) {
205 error("Ran out of hardware temporaries");
206 return 0;
207 }
208
209 alloc_hw_reg(s, file, index, hwindex);
210 return hwindex;
211 }
212
213
214 static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
215 {
216 if (!s->HwTemps[hwindex].RefCount) {
217 error("Hwindex %i refcount error", hwindex);
218 return;
219 }
220
221 s->HwTemps[hwindex].RefCount--;
222 }
223
224 static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
225 {
226 pairinst->NextReady = *list;
227 *list = pairinst;
228 }
229
230 /**
231 * The instruction at the given IP has become ready. Link it into the ready
232 * instructions.
233 */
234 static void instruction_ready(struct pair_state *s, int ip)
235 {
236 struct pair_state_instruction *pairinst = s->Instructions + ip;
237
238 if (s->Verbose)
239 _mesa_printf("instruction_ready(%i)\n", ip);
240
241 if (s->NumKillInsts > 0 && pairinst->IsOutput)
242 add_pairinst_to_list(&s->DeferredInsts, pairinst);
243 else if (pairinst->IsTex)
244 add_pairinst_to_list(&s->ReadyTEX, pairinst);
245 else if (!pairinst->NeedAlpha)
246 add_pairinst_to_list(&s->ReadyRGB, pairinst);
247 else if (!pairinst->NeedRGB)
248 add_pairinst_to_list(&s->ReadyAlpha, pairinst);
249 else
250 add_pairinst_to_list(&s->ReadyFullALU, pairinst);
251 }
252
253
254 /**
255 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
256 * and reverse the order of arguments for CMP.
257 */
258 static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
259 {
260 struct prog_src_register tmp;
261
262 switch(inst->Opcode) {
263 case OPCODE_ADD:
264 inst->SrcReg[2] = inst->SrcReg[1];
265 inst->SrcReg[1].File = PROGRAM_BUILTIN;
266 inst->SrcReg[1].Swizzle = SWIZZLE_1111;
267 inst->SrcReg[1].Negate = NEGATE_NONE;
268 inst->Opcode = OPCODE_MAD;
269 break;
270 case OPCODE_CMP:
271 tmp = inst->SrcReg[2];
272 inst->SrcReg[2] = inst->SrcReg[0];
273 inst->SrcReg[0] = tmp;
274 break;
275 case OPCODE_MOV:
276 /* AMD say we should use CMP.
277 * However, when we transform
278 * KIL -r0;
279 * into
280 * CMP tmp, -r0, -r0, 0;
281 * KIL tmp;
282 * we get incorrect behaviour on R500 when r0 == 0.0.
283 * It appears that the R500 KIL hardware treats -0.0 as less
284 * than zero.
285 */
286 inst->SrcReg[1].File = PROGRAM_BUILTIN;
287 inst->SrcReg[1].Swizzle = SWIZZLE_1111;
288 inst->SrcReg[2].File = PROGRAM_BUILTIN;
289 inst->SrcReg[2].Swizzle = SWIZZLE_0000;
290 inst->Opcode = OPCODE_MAD;
291 break;
292 case OPCODE_MUL:
293 inst->SrcReg[2].File = PROGRAM_BUILTIN;
294 inst->SrcReg[2].Swizzle = SWIZZLE_0000;
295 inst->Opcode = OPCODE_MAD;
296 break;
297 default:
298 /* nothing to do */
299 break;
300 }
301 }
302
303
304 /**
305 * Classify an instruction according to which ALUs etc. it needs
306 */
307 static void classify_instruction(struct pair_state *s,
308 struct prog_instruction *inst, struct pair_state_instruction *pairinst)
309 {
310 pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
311 pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
312
313 switch(inst->Opcode) {
314 case OPCODE_ADD:
315 case OPCODE_CMP:
316 case OPCODE_DDX:
317 case OPCODE_DDY:
318 case OPCODE_FRC:
319 case OPCODE_MAD:
320 case OPCODE_MAX:
321 case OPCODE_MIN:
322 case OPCODE_MOV:
323 case OPCODE_MUL:
324 break;
325 case OPCODE_COS:
326 case OPCODE_EX2:
327 case OPCODE_LG2:
328 case OPCODE_RCP:
329 case OPCODE_RSQ:
330 case OPCODE_SIN:
331 pairinst->IsTranscendent = 1;
332 pairinst->NeedAlpha = 1;
333 break;
334 case OPCODE_DP4:
335 pairinst->NeedAlpha = 1;
336 /* fall through */
337 case OPCODE_DP3:
338 pairinst->NeedRGB = 1;
339 break;
340 case OPCODE_KIL:
341 case OPCODE_TEX:
342 case OPCODE_TXB:
343 case OPCODE_TXP:
344 case OPCODE_END:
345 pairinst->IsTex = 1;
346 break;
347 default:
348 error("Unknown opcode %d\n", inst->Opcode);
349 break;
350 }
351
352 pairinst->IsOutput = (inst->DstReg.File == PROGRAM_OUTPUT);
353 }
354
355
356 /**
357 * Count which (input, temporary) register is read and written how often,
358 * and scan the instruction stream to find dependencies.
359 */
360 static void scan_instructions(struct pair_state *s)
361 {
362 struct prog_instruction *inst;
363 struct pair_state_instruction *pairinst;
364 GLuint ip;
365
366 for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;
367 inst->Opcode != OPCODE_END;
368 ++inst, ++pairinst, ++ip) {
369 final_rewrite(s, inst);
370 classify_instruction(s, inst, pairinst);
371
372 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
373 int j;
374 for(j = 0; j < nsrc; j++) {
375 struct pair_register_translation *t =
376 get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);
377 if (!t)
378 continue;
379
380 t->RefCount++;
381
382 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
383 int i;
384 for(i = 0; i < 4; ++i) {
385 GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);
386 if (swz >= 4)
387 continue; /* constant or NIL swizzle */
388 if (!t->Value[swz])
389 continue; /* this is an undefined read */
390
391 /* Do not add a dependency if this instruction
392 * also rewrites the value. The code below adds
393 * a dependency for the DstReg, which is a superset
394 * of the SrcReg dependency. */
395 if (inst->DstReg.File == PROGRAM_TEMPORARY &&
396 inst->DstReg.Index == inst->SrcReg[j].Index &&
397 GET_BIT(inst->DstReg.WriteMask, swz))
398 continue;
399
400 struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];
401 pairinst->NumDependencies++;
402 t->Value[swz]->NumReaders++;
403 r->IP = ip;
404 r->Next = t->Value[swz]->Readers;
405 t->Value[swz]->Readers = r;
406 }
407 }
408 }
409
410 int ndst = _mesa_num_inst_dst_regs(inst->Opcode);
411 if (ndst) {
412 struct pair_register_translation *t =
413 get_register(s, inst->DstReg.File, inst->DstReg.Index);
414 if (t) {
415 t->RefCount++;
416
417 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
418 int j;
419 for(j = 0; j < 4; ++j) {
420 if (!GET_BIT(inst->DstReg.WriteMask, j))
421 continue;
422
423 struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];
424 v->IP = ip;
425 if (t->Value[j]) {
426 pairinst->NumDependencies++;
427 t->Value[j]->Next = v;
428 }
429 t->Value[j] = v;
430 pairinst->Values[j] = v;
431 }
432 }
433 }
434 }
435
436 if (s->Verbose)
437 _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
438
439 if (!pairinst->NumDependencies)
440 instruction_ready(s, ip);
441 }
442
443 /* Clear the PROGRAM_TEMPORARY state */
444 int i, j;
445 for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
446 for(j = 0; j < 4; ++j)
447 s->Temps[i].Value[j] = 0;
448 }
449 }
450
451
452 /**
453 * Reserve hardware temporary registers for the program inputs.
454 *
455 * @note This allocation is performed explicitly, because the order of inputs
456 * is determined by the RS hardware.
457 */
458 static void allocate_input_registers(struct pair_state *s)
459 {
460 GLuint InputsRead = s->Program->InputsRead;
461 int i;
462 GLuint hwindex = 0;
463
464 /* Primary colour */
465 if (InputsRead & FRAG_BIT_COL0)
466 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
467 InputsRead &= ~FRAG_BIT_COL0;
468
469 /* Secondary color */
470 if (InputsRead & FRAG_BIT_COL1)
471 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
472 InputsRead &= ~FRAG_BIT_COL1;
473
474 /* Texcoords */
475 for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
476 if (InputsRead & (FRAG_BIT_TEX0 << i))
477 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
478 }
479 InputsRead &= ~FRAG_BITS_TEX_ANY;
480
481 /* Fogcoords treated as a texcoord */
482 if (InputsRead & FRAG_BIT_FOGC)
483 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
484 InputsRead &= ~FRAG_BIT_FOGC;
485
486 /* fragment position treated as a texcoord */
487 if (InputsRead & FRAG_BIT_WPOS)
488 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
489 InputsRead &= ~FRAG_BIT_WPOS;
490
491 /* Anything else */
492 if (InputsRead)
493 error("Don't know how to handle inputs 0x%x\n", InputsRead);
494 }
495
496
497 static void decrement_dependencies(struct pair_state *s, int ip)
498 {
499 struct pair_state_instruction *pairinst = s->Instructions + ip;
500 ASSERT(pairinst->NumDependencies > 0);
501 if (!--pairinst->NumDependencies)
502 instruction_ready(s, ip);
503 }
504
505 /**
506 * Update the dependency tracking state based on what the instruction
507 * at the given IP does.
508 */
509 static void commit_instruction(struct pair_state *s, int ip)
510 {
511 struct prog_instruction *inst = s->Program->Instructions + ip;
512 struct pair_state_instruction *pairinst = s->Instructions + ip;
513
514 if (s->Verbose)
515 _mesa_printf("commit_instruction(%i)\n", ip);
516
517 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
518 struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
519 deref_hw_reg(s, t->HwIndex);
520
521 int i;
522 for(i = 0; i < 4; ++i) {
523 if (!GET_BIT(inst->DstReg.WriteMask, i))
524 continue;
525
526 t->Value[i] = pairinst->Values[i];
527 if (t->Value[i]->NumReaders) {
528 struct reg_value_reader *r;
529 for(r = pairinst->Values[i]->Readers; r; r = r->Next)
530 decrement_dependencies(s, r->IP);
531 } else if (t->Value[i]->Next) {
532 /* This happens when the only reader writes
533 * the register at the same time */
534 decrement_dependencies(s, t->Value[i]->Next->IP);
535 }
536 }
537 }
538
539 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
540 int i;
541 for(i = 0; i < nsrc; i++) {
542 struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
543 if (!t)
544 continue;
545
546 deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
547
548 if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
549 continue;
550
551 int j;
552 for(j = 0; j < 4; ++j) {
553 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
554 if (swz >= 4)
555 continue;
556 if (!t->Value[swz])
557 continue;
558
559 /* Do not free a dependency if this instruction
560 * also rewrites the value. See scan_instructions. */
561 if (inst->DstReg.File == PROGRAM_TEMPORARY &&
562 inst->DstReg.Index == inst->SrcReg[i].Index &&
563 GET_BIT(inst->DstReg.WriteMask, swz))
564 continue;
565
566 if (!--t->Value[swz]->NumReaders) {
567 if (t->Value[swz]->Next)
568 decrement_dependencies(s, t->Value[swz]->Next->IP);
569 }
570 }
571 }
572 }
573
574
575 /**
576 * Emit all ready texture instructions in a single block.
577 *
578 * Emit as a single block to (hopefully) sample many textures in parallel,
579 * and to avoid hardware indirections on R300.
580 *
581 * In R500, we don't really know when the result of a texture instruction
582 * arrives. So allocate all destinations first, to make sure they do not
583 * arrive early and overwrite a texture coordinate we're going to use later
584 * in the block.
585 */
586 static void emit_all_tex(struct pair_state *s)
587 {
588 struct pair_state_instruction *readytex;
589 struct pair_state_instruction *pairinst;
590
591 ASSERT(s->ReadyTEX);
592
593 // Don't let the ready list change under us!
594 readytex = s->ReadyTEX;
595 s->ReadyTEX = 0;
596
597 // Allocate destination hardware registers in one block to avoid conflicts.
598 for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
599 int ip = pairinst - s->Instructions;
600 struct prog_instruction *inst = s->Program->Instructions + ip;
601 if (inst->Opcode != OPCODE_KIL)
602 get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
603 }
604
605 if (s->Debug)
606 _mesa_printf(" BEGIN_TEX\n");
607
608 if (s->Handler->BeginTexBlock)
609 s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
610
611 for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
612 int ip = pairinst - s->Instructions;
613 struct prog_instruction *inst = s->Program->Instructions + ip;
614 commit_instruction(s, ip);
615
616 if (inst->Opcode == OPCODE_KIL)
617 --s->NumKillInsts;
618 else
619 inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
620
621 inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
622
623 if (s->Debug) {
624 _mesa_printf(" ");
625 _mesa_print_instruction(inst);
626 }
627 s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
628 }
629
630 if (s->Debug)
631 _mesa_printf(" END_TEX\n");
632 }
633
634
635 static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
636 struct prog_src_register src, GLboolean rgb, GLboolean alpha)
637 {
638 int candidate = -1;
639 int candidate_quality = -1;
640 int i;
641
642 if (!rgb && !alpha)
643 return 0;
644
645 GLuint constant;
646 GLuint index;
647
648 if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
649 constant = 0;
650 index = get_hw_reg(s, src.File, src.Index);
651 } else {
652 constant = 1;
653 s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
654 }
655
656 for(i = 0; i < 3; ++i) {
657 int q = 0;
658 if (rgb) {
659 if (pair->RGB.Src[i].Used) {
660 if (pair->RGB.Src[i].Constant != constant ||
661 pair->RGB.Src[i].Index != index)
662 continue;
663 q++;
664 }
665 }
666 if (alpha) {
667 if (pair->Alpha.Src[i].Used) {
668 if (pair->Alpha.Src[i].Constant != constant ||
669 pair->Alpha.Src[i].Index != index)
670 continue;
671 q++;
672 }
673 }
674 if (q > candidate_quality) {
675 candidate_quality = q;
676 candidate = i;
677 }
678 }
679
680 if (candidate >= 0) {
681 if (rgb) {
682 pair->RGB.Src[candidate].Used = 1;
683 pair->RGB.Src[candidate].Constant = constant;
684 pair->RGB.Src[candidate].Index = index;
685 }
686 if (alpha) {
687 pair->Alpha.Src[candidate].Used = 1;
688 pair->Alpha.Src[candidate].Constant = constant;
689 pair->Alpha.Src[candidate].Index = index;
690 }
691 }
692
693 return candidate;
694 }
695
696 /**
697 * Fill the given ALU instruction's opcodes and source operands into the given pair,
698 * if possible.
699 */
700 static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
701 {
702 struct pair_state_instruction *pairinst = s->Instructions + ip;
703 struct prog_instruction *inst = s->Program->Instructions + ip;
704
705 ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
706 ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
707
708 if (pairinst->NeedRGB) {
709 if (pairinst->IsTranscendent)
710 pair->RGB.Opcode = OPCODE_REPL_ALPHA;
711 else
712 pair->RGB.Opcode = inst->Opcode;
713 if (inst->SaturateMode == SATURATE_ZERO_ONE)
714 pair->RGB.Saturate = 1;
715 }
716 if (pairinst->NeedAlpha) {
717 pair->Alpha.Opcode = inst->Opcode;
718 if (inst->SaturateMode == SATURATE_ZERO_ONE)
719 pair->Alpha.Saturate = 1;
720 }
721
722 int nargs = _mesa_num_inst_src_regs(inst->Opcode);
723 int i;
724
725 /* Special case for DDX/DDY (MDH/MDV). */
726 if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
727 if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
728 return GL_FALSE;
729 else
730 nargs++;
731 }
732
733 for(i = 0; i < nargs; ++i) {
734 int source;
735 if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
736 GLboolean srcrgb = GL_FALSE;
737 GLboolean srcalpha = GL_FALSE;
738 int j;
739 for(j = 0; j < 3; ++j) {
740 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
741 if (swz < 3)
742 srcrgb = GL_TRUE;
743 else if (swz < 4)
744 srcalpha = GL_TRUE;
745 }
746 source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
747 if (source < 0)
748 return GL_FALSE;
749 pair->RGB.Arg[i].Source = source;
750 pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
751 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
752 pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
753 }
754 if (pairinst->NeedAlpha) {
755 GLboolean srcrgb = GL_FALSE;
756 GLboolean srcalpha = GL_FALSE;
757 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
758 if (swz < 3)
759 srcrgb = GL_TRUE;
760 else if (swz < 4)
761 srcalpha = GL_TRUE;
762 source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
763 if (source < 0)
764 return GL_FALSE;
765 pair->Alpha.Arg[i].Source = source;
766 pair->Alpha.Arg[i].Swizzle = swz;
767 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
768 pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
769 }
770 }
771
772 return GL_TRUE;
773 }
774
775
776 /**
777 * Fill in the destination register information.
778 *
779 * This is split from filling in source registers because we want
780 * to avoid allocating hardware temporaries for destinations until
781 * we are absolutely certain that we're going to emit a certain
782 * instruction pairing.
783 */
784 static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip)
785 {
786 struct pair_state_instruction *pairinst = s->Instructions + ip;
787 struct prog_instruction *inst = s->Program->Instructions + ip;
788
789 if (inst->DstReg.File == PROGRAM_OUTPUT) {
790 if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
791 pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
792 pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
793 } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
794 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
795 }
796 } else {
797 GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
798 if (pairinst->NeedRGB) {
799 pair->RGB.DestIndex = hwindex;
800 pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
801 }
802 if (pairinst->NeedAlpha) {
803 pair->Alpha.DestIndex = hwindex;
804 pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
805 }
806 }
807 }
808
809
810 /**
811 * Find a good ALU instruction or pair of ALU instruction and emit it.
812 *
813 * Prefer emitting full ALU instructions, so that when we reach a point
814 * where no full ALU instruction can be emitted, we have more candidates
815 * for RGB/Alpha pairing.
816 */
817 static void emit_alu(struct pair_state *s)
818 {
819 struct radeon_pair_instruction pair;
820
821 if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
822 int ip;
823 if (s->ReadyFullALU) {
824 ip = s->ReadyFullALU - s->Instructions;
825 s->ReadyFullALU = s->ReadyFullALU->NextReady;
826 } else if (s->ReadyRGB) {
827 ip = s->ReadyRGB - s->Instructions;
828 s->ReadyRGB = s->ReadyRGB->NextReady;
829 } else {
830 ip = s->ReadyAlpha - s->Instructions;
831 s->ReadyAlpha = s->ReadyAlpha->NextReady;
832 }
833
834 _mesa_bzero(&pair, sizeof(pair));
835 fill_instruction_into_pair(s, &pair, ip);
836 fill_dest_into_pair(s, &pair, ip);
837 commit_instruction(s, ip);
838 } else {
839 struct pair_state_instruction **prgb;
840 struct pair_state_instruction **palpha;
841
842 /* Some pairings might fail because they require too
843 * many source slots; try all possible pairings if necessary */
844 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
845 for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
846 int rgbip = *prgb - s->Instructions;
847 int alphaip = *palpha - s->Instructions;
848 _mesa_bzero(&pair, sizeof(pair));
849 fill_instruction_into_pair(s, &pair, rgbip);
850 if (!fill_instruction_into_pair(s, &pair, alphaip))
851 continue;
852 *prgb = (*prgb)->NextReady;
853 *palpha = (*palpha)->NextReady;
854 fill_dest_into_pair(s, &pair, rgbip);
855 fill_dest_into_pair(s, &pair, alphaip);
856 commit_instruction(s, rgbip);
857 commit_instruction(s, alphaip);
858 goto success;
859 }
860 }
861
862 /* No success in pairing; just take the first RGB instruction */
863 int ip = s->ReadyRGB - s->Instructions;
864 s->ReadyRGB = s->ReadyRGB->NextReady;
865 _mesa_bzero(&pair, sizeof(pair));
866 fill_instruction_into_pair(s, &pair, ip);
867 fill_dest_into_pair(s, &pair, ip);
868 commit_instruction(s, ip);
869 success: ;
870 }
871
872 if (s->Debug)
873 radeonPrintPairInstruction(&pair);
874
875 s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
876 }
877
878 static GLubyte countKillInsts(struct gl_program *prog)
879 {
880 GLubyte i, count = 0;
881
882 for (i = 0; i < prog->NumInstructions; ++i) {
883 if (prog->Instructions[i].Opcode == OPCODE_KIL)
884 ++count;
885 }
886
887 return count;
888 }
889
890 GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
891 const struct radeon_pair_handler* handler, void *userdata)
892 {
893 struct pair_state s;
894
895 _mesa_bzero(&s, sizeof(s));
896 s.Ctx = ctx;
897 s.Program = program;
898 s.Handler = handler;
899 s.UserData = userdata;
900 s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
901 s.Verbose = GL_FALSE && s.Debug;
902 s.NumKillInsts = countKillInsts(program);
903
904 s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
905 sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
906 s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4);
907 s.ReaderPool = (struct reg_value_reader*)_mesa_calloc(
908 sizeof(struct reg_value_reader)*s.Program->NumInstructions*12);
909
910 if (s.Debug)
911 _mesa_printf("Emit paired program\n");
912
913 scan_instructions(&s);
914 allocate_input_registers(&s);
915
916 while(!s.Error &&
917 (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
918 if (s.ReadyTEX)
919 emit_all_tex(&s);
920
921 if (!s.NumKillInsts) {
922 struct pair_state_instruction *pairinst = s.DeferredInsts;
923 while (pairinst) {
924 if (!pairinst->NeedAlpha)
925 add_pairinst_to_list(&s.ReadyRGB, pairinst);
926 else if (!pairinst->NeedRGB)
927 add_pairinst_to_list(&s.ReadyAlpha, pairinst);
928 else
929 add_pairinst_to_list(&s.ReadyFullALU, pairinst);
930
931 pairinst = pairinst->NextReady;
932 }
933 s.DeferredInsts = NULL;
934 }
935
936 while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
937 emit_alu(&s);
938 }
939
940 if (s.Debug)
941 _mesa_printf(" END\n");
942
943 _mesa_free(s.Instructions);
944 _mesa_free(s.ValuePool);
945 _mesa_free(s.ReaderPool);
946
947 return !s.Error;
948 }
949
950
951 static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
952 {
953 _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
954 }
955
956 static const char* opcode_string(GLuint opcode)
957 {
958 if (opcode == OPCODE_REPL_ALPHA)
959 return "SOP";
960 else
961 return _mesa_opcode_string(opcode);
962 }
963
964 static int num_pairinst_args(GLuint opcode)
965 {
966 if (opcode == OPCODE_REPL_ALPHA)
967 return 0;
968 else
969 return _mesa_num_inst_src_regs(opcode);
970 }
971
972 static char swizzle_char(GLuint swz)
973 {
974 switch(swz) {
975 case SWIZZLE_X: return 'x';
976 case SWIZZLE_Y: return 'y';
977 case SWIZZLE_Z: return 'z';
978 case SWIZZLE_W: return 'w';
979 case SWIZZLE_ZERO: return '0';
980 case SWIZZLE_ONE: return '1';
981 case SWIZZLE_NIL: return '_';
982 default: return '?';
983 }
984 }
985
986 void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
987 {
988 int nargs;
989 int i;
990
991 _mesa_printf(" RGB: ");
992 for(i = 0; i < 3; ++i) {
993 if (inst->RGB.Src[i].Used)
994 print_pair_src(i, inst->RGB.Src + i);
995 }
996 _mesa_printf("\n");
997 _mesa_printf(" Alpha:");
998 for(i = 0; i < 3; ++i) {
999 if (inst->Alpha.Src[i].Used)
1000 print_pair_src(i, inst->Alpha.Src + i);
1001 }
1002 _mesa_printf("\n");
1003
1004 _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
1005 if (inst->RGB.WriteMask)
1006 _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
1007 (inst->RGB.WriteMask & 1) ? "x" : "",
1008 (inst->RGB.WriteMask & 2) ? "y" : "",
1009 (inst->RGB.WriteMask & 4) ? "z" : "");
1010 if (inst->RGB.OutputWriteMask)
1011 _mesa_printf(" COLOR.%s%s%s",
1012 (inst->RGB.OutputWriteMask & 1) ? "x" : "",
1013 (inst->RGB.OutputWriteMask & 2) ? "y" : "",
1014 (inst->RGB.OutputWriteMask & 4) ? "z" : "");
1015 nargs = num_pairinst_args(inst->RGB.Opcode);
1016 for(i = 0; i < nargs; ++i) {
1017 const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
1018 const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
1019 _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
1020 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
1021 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
1022 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
1023 abs);
1024 }
1025 _mesa_printf("\n");
1026
1027 _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
1028 if (inst->Alpha.WriteMask)
1029 _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
1030 if (inst->Alpha.OutputWriteMask)
1031 _mesa_printf(" COLOR.w");
1032 if (inst->Alpha.DepthWriteMask)
1033 _mesa_printf(" DEPTH.w");
1034 nargs = num_pairinst_args(inst->Alpha.Opcode);
1035 for(i = 0; i < nargs; ++i) {
1036 const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
1037 const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
1038 _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
1039 swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
1040 }
1041 _mesa_printf("\n");
1042 }