r300: Add radeon_compiler as a base for compilation-related tasks
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_pair.c
1 /*
2 * Copyright (C) 2008 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * @file
30 *
31 * Perform temporary register allocation and attempt to pair off instructions
32 * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
33 * vs. ALU instruction scheduling.
34 */
35
36 #include "radeon_program_pair.h"
37
38 #include "memory_pool.h"
39 #include "radeon_compiler.h"
40 #include "shader/prog_print.h"
41
42 #define error(fmt, args...) do { \
43 fprintf(stderr, "r300 driver problem: %s::%s(): " fmt "\n", \
44 __FILE__, __FUNCTION__, ##args); \
45 s->Error = GL_TRUE; \
46 } while(0)
47
48 struct pair_state_instruction {
49 struct prog_instruction Instruction;
50 GLuint IP; /**< Position of this instruction in original program */
51
52 GLuint IsTex:1; /**< Is a texture instruction */
53 GLuint NeedRGB:1; /**< Needs the RGB ALU */
54 GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
55 GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
56
57 /**
58 * Number of (read and write) dependencies that must be resolved before
59 * this instruction can be scheduled.
60 */
61 GLuint NumDependencies:5;
62
63 /**
64 * Next instruction in the linked list of ready instructions.
65 */
66 struct pair_state_instruction *NextReady;
67
68 /**
69 * Values that this instruction writes
70 */
71 struct reg_value *Values[4];
72 };
73
74
75 /**
76 * Used to keep track of which instructions read a value.
77 */
78 struct reg_value_reader {
79 struct pair_state_instruction *Reader;
80 struct reg_value_reader *Next;
81 };
82
83 /**
84 * Used to keep track which values are stored in each component of a
85 * PROGRAM_TEMPORARY.
86 */
87 struct reg_value {
88 struct pair_state_instruction *Writer;
89 struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
90
91 /**
92 * Unordered linked list of instructions that read from this value.
93 */
94 struct reg_value_reader *Readers;
95
96 /**
97 * Number of readers of this value. This is calculated during @ref scan_instructions
98 * and continually decremented during code emission.
99 * When this count reaches zero, the instruction that writes the @ref Next value
100 * can be scheduled.
101 */
102 GLuint NumReaders;
103 };
104
105 /**
106 * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
107 * to the proper hardware temporary.
108 */
109 struct pair_register_translation {
110 GLuint Allocated:1;
111 GLuint HwIndex:8;
112 GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
113
114 /**
115 * Notes the value that is currently contained in each component
116 * (only used for PROGRAM_TEMPORARY registers).
117 */
118 struct reg_value *Value[4];
119 };
120
121 struct pair_state {
122 struct radeon_compiler * Compiler;
123 struct gl_program *Program;
124 const struct radeon_pair_handler *Handler;
125 GLboolean Error;
126 GLboolean Verbose;
127 void *UserData;
128
129 /**
130 * Translate Mesa registers to hardware registers
131 */
132 struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
133 struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
134
135 struct {
136 GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
137 } HwTemps[128];
138
139 /**
140 * Linked list of instructions that can be scheduled right now,
141 * based on which ALU/TEX resources they require.
142 */
143 struct pair_state_instruction *ReadyFullALU;
144 struct pair_state_instruction *ReadyRGB;
145 struct pair_state_instruction *ReadyAlpha;
146 struct pair_state_instruction *ReadyTEX;
147 };
148
149
150 static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
151 {
152 switch(file) {
153 case PROGRAM_TEMPORARY: return &s->Temps[index];
154 case PROGRAM_INPUT: return &s->Inputs[index];
155 default: return 0;
156 }
157 }
158
159 static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
160 {
161 struct pair_register_translation *t = get_register(s, file, index);
162 ASSERT(!s->HwTemps[hwindex].RefCount);
163 ASSERT(!t->Allocated);
164 s->HwTemps[hwindex].RefCount = t->RefCount;
165 t->Allocated = 1;
166 t->HwIndex = hwindex;
167 }
168
169 static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
170 {
171 GLuint hwindex;
172
173 struct pair_register_translation *t = get_register(s, file, index);
174 if (!t) {
175 error("get_hw_reg: %i[%i]\n", file, index);
176 return 0;
177 }
178
179 if (t->Allocated)
180 return t->HwIndex;
181
182 for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
183 if (!s->HwTemps[hwindex].RefCount)
184 break;
185
186 if (hwindex >= s->Handler->MaxHwTemps) {
187 error("Ran out of hardware temporaries");
188 return 0;
189 }
190
191 alloc_hw_reg(s, file, index, hwindex);
192 return hwindex;
193 }
194
195
196 static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
197 {
198 if (!s->HwTemps[hwindex].RefCount) {
199 error("Hwindex %i refcount error", hwindex);
200 return;
201 }
202
203 s->HwTemps[hwindex].RefCount--;
204 }
205
206 static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
207 {
208 pairinst->NextReady = *list;
209 *list = pairinst;
210 }
211
212 /**
213 * The given instruction has become ready. Link it into the ready
214 * instructions.
215 */
216 static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
217 {
218 if (s->Verbose)
219 _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
220
221 if (pairinst->IsTex)
222 add_pairinst_to_list(&s->ReadyTEX, pairinst);
223 else if (!pairinst->NeedAlpha)
224 add_pairinst_to_list(&s->ReadyRGB, pairinst);
225 else if (!pairinst->NeedRGB)
226 add_pairinst_to_list(&s->ReadyAlpha, pairinst);
227 else
228 add_pairinst_to_list(&s->ReadyFullALU, pairinst);
229 }
230
231
232 /**
233 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
234 * and reverse the order of arguments for CMP.
235 */
236 static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
237 {
238 struct prog_src_register tmp;
239
240 switch(inst->Opcode) {
241 case OPCODE_ADD:
242 inst->SrcReg[2] = inst->SrcReg[1];
243 inst->SrcReg[1].File = PROGRAM_BUILTIN;
244 inst->SrcReg[1].Swizzle = SWIZZLE_1111;
245 inst->SrcReg[1].Negate = NEGATE_NONE;
246 inst->Opcode = OPCODE_MAD;
247 break;
248 case OPCODE_CMP:
249 tmp = inst->SrcReg[2];
250 inst->SrcReg[2] = inst->SrcReg[0];
251 inst->SrcReg[0] = tmp;
252 break;
253 case OPCODE_MOV:
254 /* AMD say we should use CMP.
255 * However, when we transform
256 * KIL -r0;
257 * into
258 * CMP tmp, -r0, -r0, 0;
259 * KIL tmp;
260 * we get incorrect behaviour on R500 when r0 == 0.0.
261 * It appears that the R500 KIL hardware treats -0.0 as less
262 * than zero.
263 */
264 inst->SrcReg[1].File = PROGRAM_BUILTIN;
265 inst->SrcReg[1].Swizzle = SWIZZLE_1111;
266 inst->SrcReg[2].File = PROGRAM_BUILTIN;
267 inst->SrcReg[2].Swizzle = SWIZZLE_0000;
268 inst->Opcode = OPCODE_MAD;
269 break;
270 case OPCODE_MUL:
271 inst->SrcReg[2].File = PROGRAM_BUILTIN;
272 inst->SrcReg[2].Swizzle = SWIZZLE_0000;
273 inst->Opcode = OPCODE_MAD;
274 break;
275 default:
276 /* nothing to do */
277 break;
278 }
279 }
280
281
282 /**
283 * Classify an instruction according to which ALUs etc. it needs
284 */
285 static void classify_instruction(struct pair_state *s,
286 struct pair_state_instruction *psi)
287 {
288 psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
289 psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
290
291 switch(psi->Instruction.Opcode) {
292 case OPCODE_ADD:
293 case OPCODE_CMP:
294 case OPCODE_DDX:
295 case OPCODE_DDY:
296 case OPCODE_FRC:
297 case OPCODE_MAD:
298 case OPCODE_MAX:
299 case OPCODE_MIN:
300 case OPCODE_MOV:
301 case OPCODE_MUL:
302 break;
303 case OPCODE_COS:
304 case OPCODE_EX2:
305 case OPCODE_LG2:
306 case OPCODE_RCP:
307 case OPCODE_RSQ:
308 case OPCODE_SIN:
309 psi->IsTranscendent = 1;
310 psi->NeedAlpha = 1;
311 break;
312 case OPCODE_DP4:
313 psi->NeedAlpha = 1;
314 /* fall through */
315 case OPCODE_DP3:
316 psi->NeedRGB = 1;
317 break;
318 case OPCODE_KIL:
319 case OPCODE_TEX:
320 case OPCODE_TXB:
321 case OPCODE_TXP:
322 case OPCODE_END:
323 psi->IsTex = 1;
324 break;
325 default:
326 error("Unknown opcode %d\n", psi->Instruction.Opcode);
327 break;
328 }
329 }
330
331
332 /**
333 * Count which (input, temporary) register is read and written how often,
334 * and scan the instruction stream to find dependencies.
335 */
336 static void scan_instructions(struct pair_state *s)
337 {
338 struct prog_instruction *source;
339 GLuint ip;
340
341 for(source = s->Program->Instructions, ip = 0;
342 source->Opcode != OPCODE_END;
343 ++source, ++ip) {
344 struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Pool, sizeof(*pairinst));
345 memset(pairinst, 0, sizeof(struct pair_state_instruction));
346
347 pairinst->Instruction = *source;
348 pairinst->IP = ip;
349 final_rewrite(s, &pairinst->Instruction);
350 classify_instruction(s, pairinst);
351
352 int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
353 int j;
354 for(j = 0; j < nsrc; j++) {
355 struct pair_register_translation *t =
356 get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
357 if (!t)
358 continue;
359
360 t->RefCount++;
361
362 if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
363 int i;
364 for(i = 0; i < 4; ++i) {
365 GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
366 if (swz >= 4)
367 continue; /* constant or NIL swizzle */
368 if (!t->Value[swz])
369 continue; /* this is an undefined read */
370
371 /* Do not add a dependency if this instruction
372 * also rewrites the value. The code below adds
373 * a dependency for the DstReg, which is a superset
374 * of the SrcReg dependency. */
375 if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
376 pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
377 GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
378 continue;
379
380 struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Pool, sizeof(*r));
381 pairinst->NumDependencies++;
382 t->Value[swz]->NumReaders++;
383 r->Reader = pairinst;
384 r->Next = t->Value[swz]->Readers;
385 t->Value[swz]->Readers = r;
386 }
387 }
388 }
389
390 int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
391 if (ndst) {
392 struct pair_register_translation *t =
393 get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
394 if (t) {
395 t->RefCount++;
396
397 if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
398 int j;
399 for(j = 0; j < 4; ++j) {
400 if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
401 continue;
402
403 struct reg_value* v = memory_pool_malloc(&s->Compiler->Pool, sizeof(*v));
404 memset(v, 0, sizeof(struct reg_value));
405 v->Writer = pairinst;
406 if (t->Value[j]) {
407 pairinst->NumDependencies++;
408 t->Value[j]->Next = v;
409 }
410 t->Value[j] = v;
411 pairinst->Values[j] = v;
412 }
413 }
414 }
415 }
416
417 if (s->Verbose)
418 _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
419
420 if (!pairinst->NumDependencies)
421 instruction_ready(s, pairinst);
422 }
423
424 /* Clear the PROGRAM_TEMPORARY state */
425 int i, j;
426 for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
427 for(j = 0; j < 4; ++j)
428 s->Temps[i].Value[j] = 0;
429 }
430 }
431
432
433 /**
434 * Reserve hardware temporary registers for the program inputs.
435 *
436 * @note This allocation is performed explicitly, because the order of inputs
437 * is determined by the RS hardware.
438 */
439 static void allocate_input_registers(struct pair_state *s)
440 {
441 GLuint InputsRead = s->Program->InputsRead;
442 int i;
443 GLuint hwindex = 0;
444
445 /* Primary colour */
446 if (InputsRead & FRAG_BIT_COL0)
447 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
448 InputsRead &= ~FRAG_BIT_COL0;
449
450 /* Secondary color */
451 if (InputsRead & FRAG_BIT_COL1)
452 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
453 InputsRead &= ~FRAG_BIT_COL1;
454
455 /* Texcoords */
456 for (i = 0; i < 8; i++) {
457 if (InputsRead & (FRAG_BIT_TEX0 << i))
458 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
459 }
460 InputsRead &= ~FRAG_BITS_TEX_ANY;
461
462 /* Fogcoords treated as a texcoord */
463 if (InputsRead & FRAG_BIT_FOGC)
464 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
465 InputsRead &= ~FRAG_BIT_FOGC;
466
467 /* fragment position treated as a texcoord */
468 if (InputsRead & FRAG_BIT_WPOS)
469 alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
470 InputsRead &= ~FRAG_BIT_WPOS;
471
472 /* Anything else */
473 if (InputsRead)
474 error("Don't know how to handle inputs 0x%x\n", InputsRead);
475 }
476
477
478 static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
479 {
480 ASSERT(pairinst->NumDependencies > 0);
481 if (!--pairinst->NumDependencies)
482 instruction_ready(s, pairinst);
483 }
484
485 /**
486 * Update the dependency tracking state based on what the instruction
487 * at the given IP does.
488 */
489 static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
490 {
491 struct prog_instruction *inst = &pairinst->Instruction;
492
493 if (s->Verbose)
494 _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
495
496 if (inst->DstReg.File == PROGRAM_TEMPORARY) {
497 struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
498 deref_hw_reg(s, t->HwIndex);
499
500 int i;
501 for(i = 0; i < 4; ++i) {
502 if (!GET_BIT(inst->DstReg.WriteMask, i))
503 continue;
504
505 t->Value[i] = pairinst->Values[i];
506 if (t->Value[i]->NumReaders) {
507 struct reg_value_reader *r;
508 for(r = pairinst->Values[i]->Readers; r; r = r->Next)
509 decrement_dependencies(s, r->Reader);
510 } else if (t->Value[i]->Next) {
511 /* This happens when the only reader writes
512 * the register at the same time */
513 decrement_dependencies(s, t->Value[i]->Next->Writer);
514 }
515 }
516 }
517
518 int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
519 int i;
520 for(i = 0; i < nsrc; i++) {
521 struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
522 if (!t)
523 continue;
524
525 deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
526
527 if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
528 continue;
529
530 int j;
531 for(j = 0; j < 4; ++j) {
532 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
533 if (swz >= 4)
534 continue;
535 if (!t->Value[swz])
536 continue;
537
538 /* Do not free a dependency if this instruction
539 * also rewrites the value. See scan_instructions. */
540 if (inst->DstReg.File == PROGRAM_TEMPORARY &&
541 inst->DstReg.Index == inst->SrcReg[i].Index &&
542 GET_BIT(inst->DstReg.WriteMask, swz))
543 continue;
544
545 if (!--t->Value[swz]->NumReaders) {
546 if (t->Value[swz]->Next)
547 decrement_dependencies(s, t->Value[swz]->Next->Writer);
548 }
549 }
550 }
551 }
552
553
554 /**
555 * Emit all ready texture instructions in a single block.
556 *
557 * Emit as a single block to (hopefully) sample many textures in parallel,
558 * and to avoid hardware indirections on R300.
559 *
560 * In R500, we don't really know when the result of a texture instruction
561 * arrives. So allocate all destinations first, to make sure they do not
562 * arrive early and overwrite a texture coordinate we're going to use later
563 * in the block.
564 */
565 static void emit_all_tex(struct pair_state *s)
566 {
567 struct pair_state_instruction *readytex;
568 struct pair_state_instruction *pairinst;
569
570 ASSERT(s->ReadyTEX);
571
572 // Don't let the ready list change under us!
573 readytex = s->ReadyTEX;
574 s->ReadyTEX = 0;
575
576 // Allocate destination hardware registers in one block to avoid conflicts.
577 for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
578 struct prog_instruction *inst = &pairinst->Instruction;
579 if (inst->Opcode != OPCODE_KIL)
580 get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
581 }
582
583 if (s->Compiler->Debug)
584 _mesa_printf(" BEGIN_TEX\n");
585
586 if (s->Handler->BeginTexBlock)
587 s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData);
588
589 for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
590 struct prog_instruction *inst = &pairinst->Instruction;
591 commit_instruction(s, pairinst);
592
593 if (inst->Opcode != OPCODE_KIL)
594 inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
595 inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
596
597 if (s->Compiler->Debug) {
598 _mesa_printf(" ");
599 _mesa_print_instruction(inst);
600 fflush(stdout);
601 }
602
603 struct radeon_pair_texture_instruction rpti;
604
605 switch(inst->Opcode) {
606 case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
607 case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
608 case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
609 default:
610 case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
611 }
612
613 rpti.DestIndex = inst->DstReg.Index;
614 rpti.WriteMask = inst->DstReg.WriteMask;
615 rpti.TexSrcUnit = inst->TexSrcUnit;
616 rpti.TexSrcTarget = inst->TexSrcTarget;
617 rpti.SrcIndex = inst->SrcReg[0].Index;
618 rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
619
620 s->Error = s->Error || !s->Handler->EmitTex(s->UserData, &rpti);
621 }
622
623 if (s->Compiler->Debug)
624 _mesa_printf(" END_TEX\n");
625 }
626
627
628 static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
629 struct prog_src_register src, GLboolean rgb, GLboolean alpha)
630 {
631 int candidate = -1;
632 int candidate_quality = -1;
633 int i;
634
635 if (!rgb && !alpha)
636 return 0;
637
638 GLuint constant;
639 GLuint index;
640
641 if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
642 constant = 0;
643 index = get_hw_reg(s, src.File, src.Index);
644 } else {
645 constant = 1;
646 s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index);
647 }
648
649 for(i = 0; i < 3; ++i) {
650 int q = 0;
651 if (rgb) {
652 if (pair->RGB.Src[i].Used) {
653 if (pair->RGB.Src[i].Constant != constant ||
654 pair->RGB.Src[i].Index != index)
655 continue;
656 q++;
657 }
658 }
659 if (alpha) {
660 if (pair->Alpha.Src[i].Used) {
661 if (pair->Alpha.Src[i].Constant != constant ||
662 pair->Alpha.Src[i].Index != index)
663 continue;
664 q++;
665 }
666 }
667 if (q > candidate_quality) {
668 candidate_quality = q;
669 candidate = i;
670 }
671 }
672
673 if (candidate >= 0) {
674 if (rgb) {
675 pair->RGB.Src[candidate].Used = 1;
676 pair->RGB.Src[candidate].Constant = constant;
677 pair->RGB.Src[candidate].Index = index;
678 }
679 if (alpha) {
680 pair->Alpha.Src[candidate].Used = 1;
681 pair->Alpha.Src[candidate].Constant = constant;
682 pair->Alpha.Src[candidate].Index = index;
683 }
684 }
685
686 return candidate;
687 }
688
689 /**
690 * Fill the given ALU instruction's opcodes and source operands into the given pair,
691 * if possible.
692 */
693 static GLboolean fill_instruction_into_pair(
694 struct pair_state *s,
695 struct radeon_pair_instruction *pair,
696 struct pair_state_instruction *pairinst)
697 {
698 struct prog_instruction *inst = &pairinst->Instruction;
699
700 ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
701 ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
702
703 if (pairinst->NeedRGB) {
704 if (pairinst->IsTranscendent)
705 pair->RGB.Opcode = OPCODE_REPL_ALPHA;
706 else
707 pair->RGB.Opcode = inst->Opcode;
708 if (inst->SaturateMode == SATURATE_ZERO_ONE)
709 pair->RGB.Saturate = 1;
710 }
711 if (pairinst->NeedAlpha) {
712 pair->Alpha.Opcode = inst->Opcode;
713 if (inst->SaturateMode == SATURATE_ZERO_ONE)
714 pair->Alpha.Saturate = 1;
715 }
716
717 int nargs = _mesa_num_inst_src_regs(inst->Opcode);
718 int i;
719
720 /* Special case for DDX/DDY (MDH/MDV). */
721 if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
722 if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
723 return GL_FALSE;
724 else
725 nargs++;
726 }
727
728 for(i = 0; i < nargs; ++i) {
729 int source;
730 if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
731 GLboolean srcrgb = GL_FALSE;
732 GLboolean srcalpha = GL_FALSE;
733 int j;
734 for(j = 0; j < 3; ++j) {
735 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
736 if (swz < 3)
737 srcrgb = GL_TRUE;
738 else if (swz < 4)
739 srcalpha = GL_TRUE;
740 }
741 source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
742 if (source < 0)
743 return GL_FALSE;
744 pair->RGB.Arg[i].Source = source;
745 pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
746 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
747 pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
748 }
749 if (pairinst->NeedAlpha) {
750 GLboolean srcrgb = GL_FALSE;
751 GLboolean srcalpha = GL_FALSE;
752 GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
753 if (swz < 3)
754 srcrgb = GL_TRUE;
755 else if (swz < 4)
756 srcalpha = GL_TRUE;
757 source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
758 if (source < 0)
759 return GL_FALSE;
760 pair->Alpha.Arg[i].Source = source;
761 pair->Alpha.Arg[i].Swizzle = swz;
762 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
763 pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
764 }
765 }
766
767 return GL_TRUE;
768 }
769
770
771 /**
772 * Fill in the destination register information.
773 *
774 * This is split from filling in source registers because we want
775 * to avoid allocating hardware temporaries for destinations until
776 * we are absolutely certain that we're going to emit a certain
777 * instruction pairing.
778 */
779 static void fill_dest_into_pair(
780 struct pair_state *s,
781 struct radeon_pair_instruction *pair,
782 struct pair_state_instruction *pairinst)
783 {
784 struct prog_instruction *inst = &pairinst->Instruction;
785
786 if (inst->DstReg.File == PROGRAM_OUTPUT) {
787 if (inst->DstReg.Index == FRAG_RESULT_COLOR) {
788 pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
789 pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
790 } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) {
791 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
792 }
793 } else {
794 GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
795 if (pairinst->NeedRGB) {
796 pair->RGB.DestIndex = hwindex;
797 pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
798 }
799 if (pairinst->NeedAlpha) {
800 pair->Alpha.DestIndex = hwindex;
801 pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
802 }
803 }
804 }
805
806
807 /**
808 * Find a good ALU instruction or pair of ALU instruction and emit it.
809 *
810 * Prefer emitting full ALU instructions, so that when we reach a point
811 * where no full ALU instruction can be emitted, we have more candidates
812 * for RGB/Alpha pairing.
813 */
814 static void emit_alu(struct pair_state *s)
815 {
816 struct radeon_pair_instruction pair;
817 struct pair_state_instruction *psi;
818
819 if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
820 if (s->ReadyFullALU) {
821 psi = s->ReadyFullALU;
822 s->ReadyFullALU = s->ReadyFullALU->NextReady;
823 } else if (s->ReadyRGB) {
824 psi = s->ReadyRGB;
825 s->ReadyRGB = s->ReadyRGB->NextReady;
826 } else {
827 psi = s->ReadyAlpha;
828 s->ReadyAlpha = s->ReadyAlpha->NextReady;
829 }
830
831 _mesa_bzero(&pair, sizeof(pair));
832 fill_instruction_into_pair(s, &pair, psi);
833 fill_dest_into_pair(s, &pair, psi);
834 commit_instruction(s, psi);
835 } else {
836 struct pair_state_instruction **prgb;
837 struct pair_state_instruction **palpha;
838
839 /* Some pairings might fail because they require too
840 * many source slots; try all possible pairings if necessary */
841 for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
842 for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
843 struct pair_state_instruction * psirgb = *prgb;
844 struct pair_state_instruction * psialpha = *palpha;
845 _mesa_bzero(&pair, sizeof(pair));
846 fill_instruction_into_pair(s, &pair, psirgb);
847 if (!fill_instruction_into_pair(s, &pair, psialpha))
848 continue;
849 *prgb = (*prgb)->NextReady;
850 *palpha = (*palpha)->NextReady;
851 fill_dest_into_pair(s, &pair, psirgb);
852 fill_dest_into_pair(s, &pair, psialpha);
853 commit_instruction(s, psirgb);
854 commit_instruction(s, psialpha);
855 goto success;
856 }
857 }
858
859 /* No success in pairing; just take the first RGB instruction */
860 psi = s->ReadyRGB;
861 s->ReadyRGB = s->ReadyRGB->NextReady;
862
863 _mesa_bzero(&pair, sizeof(pair));
864 fill_instruction_into_pair(s, &pair, psi);
865 fill_dest_into_pair(s, &pair, psi);
866 commit_instruction(s, psi);
867 success: ;
868 }
869
870 if (s->Compiler->Debug)
871 radeonPrintPairInstruction(&pair);
872
873 s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
874 }
875
876
877 GLboolean radeonPairProgram(
878 struct radeon_compiler * compiler,
879 struct gl_program *program,
880 const struct radeon_pair_handler* handler, void *userdata)
881 {
882 struct pair_state s;
883
884 _mesa_bzero(&s, sizeof(s));
885 s.Compiler = compiler;
886 s.Program = program;
887 s.Handler = handler;
888 s.UserData = userdata;
889 s.Verbose = GL_FALSE && s.Compiler->Debug;
890
891 if (s.Compiler->Debug)
892 _mesa_printf("Emit paired program\n");
893
894 scan_instructions(&s);
895 allocate_input_registers(&s);
896
897 while(!s.Error &&
898 (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
899 if (s.ReadyTEX)
900 emit_all_tex(&s);
901
902 while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
903 emit_alu(&s);
904 }
905
906 if (s.Compiler->Debug)
907 _mesa_printf(" END\n");
908
909 return !s.Error;
910 }
911
912
913 static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
914 {
915 _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
916 }
917
918 static const char* opcode_string(GLuint opcode)
919 {
920 if (opcode == OPCODE_REPL_ALPHA)
921 return "SOP";
922 else
923 return _mesa_opcode_string(opcode);
924 }
925
926 static int num_pairinst_args(GLuint opcode)
927 {
928 if (opcode == OPCODE_REPL_ALPHA)
929 return 0;
930 else
931 return _mesa_num_inst_src_regs(opcode);
932 }
933
934 static char swizzle_char(GLuint swz)
935 {
936 switch(swz) {
937 case SWIZZLE_X: return 'x';
938 case SWIZZLE_Y: return 'y';
939 case SWIZZLE_Z: return 'z';
940 case SWIZZLE_W: return 'w';
941 case SWIZZLE_ZERO: return '0';
942 case SWIZZLE_ONE: return '1';
943 case SWIZZLE_NIL: return '_';
944 default: return '?';
945 }
946 }
947
948 void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
949 {
950 int nargs;
951 int i;
952
953 _mesa_printf(" RGB: ");
954 for(i = 0; i < 3; ++i) {
955 if (inst->RGB.Src[i].Used)
956 print_pair_src(i, inst->RGB.Src + i);
957 }
958 _mesa_printf("\n");
959 _mesa_printf(" Alpha:");
960 for(i = 0; i < 3; ++i) {
961 if (inst->Alpha.Src[i].Used)
962 print_pair_src(i, inst->Alpha.Src + i);
963 }
964 _mesa_printf("\n");
965
966 _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
967 if (inst->RGB.WriteMask)
968 _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
969 (inst->RGB.WriteMask & 1) ? "x" : "",
970 (inst->RGB.WriteMask & 2) ? "y" : "",
971 (inst->RGB.WriteMask & 4) ? "z" : "");
972 if (inst->RGB.OutputWriteMask)
973 _mesa_printf(" COLOR.%s%s%s",
974 (inst->RGB.OutputWriteMask & 1) ? "x" : "",
975 (inst->RGB.OutputWriteMask & 2) ? "y" : "",
976 (inst->RGB.OutputWriteMask & 4) ? "z" : "");
977 nargs = num_pairinst_args(inst->RGB.Opcode);
978 for(i = 0; i < nargs; ++i) {
979 const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
980 const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
981 _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
982 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
983 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
984 swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
985 abs);
986 }
987 _mesa_printf("\n");
988
989 _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
990 if (inst->Alpha.WriteMask)
991 _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
992 if (inst->Alpha.OutputWriteMask)
993 _mesa_printf(" COLOR.w");
994 if (inst->Alpha.DepthWriteMask)
995 _mesa_printf(" DEPTH.w");
996 nargs = num_pairinst_args(inst->Alpha.Opcode);
997 for(i = 0; i < nargs; ++i) {
998 const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
999 const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
1000 _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
1001 swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
1002 }
1003 _mesa_printf("\n");
1004 }