Merge remote branch 'origin/7.8'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48
49 #define PROG_CODE \
50 struct r500_fragment_program_code *code = &c->code->code.r500
51
52 #define error(fmt, args...) do { \
53 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
54 __FILE__, __FUNCTION__, ##args); \
55 } while(0)
56
57
58 struct branch_info {
59 int If;
60 int Else;
61 int Endif;
62 };
63
64 struct emit_state {
65 struct radeon_compiler * C;
66 struct r500_fragment_program_code * Code;
67
68 struct branch_info * Branches;
69 unsigned int CurrentBranchDepth;
70 unsigned int BranchesReserved;
71
72 unsigned int MaxBranchDepth;
73 };
74
75 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
76 {
77 switch(opcode) {
78 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
79 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
80 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
81 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
82 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
83 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
84 default:
85 error("translate_rgb_op(%d): unknown opcode\n", opcode);
86 /* fall through */
87 case RC_OPCODE_NOP:
88 /* fall through */
89 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
90 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
91 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
92 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
93 }
94 }
95
96 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
97 {
98 switch(opcode) {
99 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
100 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
101 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
102 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
103 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
104 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
105 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
106 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
107 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
108 default:
109 error("translate_alpha_op(%d): unknown opcode\n", opcode);
110 /* fall through */
111 case RC_OPCODE_NOP:
112 /* fall through */
113 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
114 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
115 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
116 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
117 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
118 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
119 }
120 }
121
122 static unsigned int fix_hw_swizzle(unsigned int swz)
123 {
124 switch (swz) {
125 case RC_SWIZZLE_ZERO:
126 case RC_SWIZZLE_UNUSED:
127 swz = 4;
128 break;
129 case RC_SWIZZLE_HALF:
130 swz = 5;
131 break;
132 case RC_SWIZZLE_ONE:
133 swz = 6;
134 break;
135 }
136
137 return swz;
138 }
139
140 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
141 {
142 unsigned int t = inst->RGB.Arg[arg].Source;
143 int comp;
144 t |= inst->RGB.Arg[arg].Negate << 11;
145 t |= inst->RGB.Arg[arg].Abs << 12;
146
147 for(comp = 0; comp < 3; ++comp)
148 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
149
150 return t;
151 }
152
153 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
154 {
155 unsigned int t = inst->Alpha.Arg[i].Source;
156 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
157 t |= inst->Alpha.Arg[i].Negate << 5;
158 t |= inst->Alpha.Arg[i].Abs << 6;
159 return t;
160 }
161
162 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
163 {
164 switch(func) {
165 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
166 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
167 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
168 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
169 default:
170 rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
171 return 0;
172 }
173 }
174
175 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
176 {
177 if (index > code->max_temp_idx)
178 code->max_temp_idx = index;
179 }
180
181 static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
182 {
183 if (src.File == RC_FILE_CONSTANT) {
184 return src.Index | 0x100;
185 } else if (src.File == RC_FILE_TEMPORARY) {
186 use_temporary(code, src.Index);
187 return src.Index;
188 }
189
190 return 0;
191 }
192
193 /**
194 * NOP the specified instruction if it is not a texture lookup.
195 */
196 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
197 {
198 PROG_CODE;
199
200 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
201 code->inst[ip].inst0 |= R500_INST_NOP;
202 }
203 }
204
205 /**
206 * Emit a paired ALU instruction.
207 */
208 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
209 {
210 PROG_CODE;
211
212 if (code->inst_end >= 511) {
213 error("emit_alu: Too many instructions");
214 return;
215 }
216
217 int ip = ++code->inst_end;
218
219 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
220 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
221 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
222 if (ip > 0) {
223 alu_nop(c, ip - 1);
224 }
225 }
226
227 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
228 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
229
230 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
231 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
232 if (inst->WriteALUResult) {
233 error("%s: cannot write output and ALU result at the same time");
234 return;
235 }
236 } else {
237 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
238 }
239 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
240
241 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
242 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
243 if (inst->Alpha.DepthWriteMask) {
244 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
245 c->code->writes_depth = 1;
246 }
247
248 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
249 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
250 use_temporary(code, inst->Alpha.DestIndex);
251 use_temporary(code, inst->RGB.DestIndex);
252
253 if (inst->RGB.Saturate)
254 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
255 if (inst->Alpha.Saturate)
256 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
257
258 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
259 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
260 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
261
262 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
263 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
264 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
265
266 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
267 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
268 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
269
270 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
271 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
272 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
273
274 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
275 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
276
277 if (inst->WriteALUResult) {
278 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
279
280 if (inst->WriteALUResult == RC_ALURESULT_X)
281 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
282 else
283 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
284
285 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
286 }
287 }
288
289 static unsigned int translate_strq_swizzle(unsigned int swizzle)
290 {
291 unsigned int swiz = 0;
292 int i;
293 for (i = 0; i < 4; i++)
294 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
295 return swiz;
296 }
297
298 /**
299 * Emit a single TEX instruction
300 */
301 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
302 {
303 PROG_CODE;
304
305 if (code->inst_end >= 511) {
306 error("emit_tex: Too many instructions");
307 return 0;
308 }
309
310 int ip = ++code->inst_end;
311
312 code->inst[ip].inst0 = R500_INST_TYPE_TEX
313 | (inst->DstReg.WriteMask << 11)
314 | R500_INST_TEX_SEM_WAIT;
315 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
316 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
317
318 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
319 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
320
321 switch (inst->Opcode) {
322 case RC_OPCODE_KIL:
323 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
324 break;
325 case RC_OPCODE_TEX:
326 code->inst[ip].inst1 |= R500_TEX_INST_LD;
327 break;
328 case RC_OPCODE_TXB:
329 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
330 break;
331 case RC_OPCODE_TXP:
332 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
333 break;
334 default:
335 error("emit_tex can't handle opcode %x\n", inst->Opcode);
336 }
337
338 use_temporary(code, inst->SrcReg[0].Index);
339 if (inst->Opcode != RC_OPCODE_KIL)
340 use_temporary(code, inst->DstReg.Index);
341
342 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
343 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
344 | R500_TEX_DST_ADDR(inst->DstReg.Index)
345 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
346 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
347
348 return 1;
349 }
350
351 static void grow_branches(struct emit_state * s)
352 {
353 unsigned int newreserved = s->BranchesReserved * 2;
354 struct branch_info * newbranches;
355
356 if (!newreserved)
357 newreserved = 4;
358
359 newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
360 memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
361
362 s->Branches = newbranches;
363 s->BranchesReserved = newreserved;
364 }
365
366 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
367 {
368 if (s->Code->inst_end >= 511) {
369 rc_error(s->C, "emit_tex: Too many instructions");
370 return;
371 }
372
373 unsigned int newip = ++s->Code->inst_end;
374
375 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
376
377 if (inst->U.I.Opcode == RC_OPCODE_IF) {
378 if (s->CurrentBranchDepth >= 32) {
379 rc_error(s->C, "Branch depth exceeds hardware limit");
380 return;
381 }
382
383 if (s->CurrentBranchDepth >= s->BranchesReserved)
384 grow_branches(s);
385
386 struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
387 branch->If = newip;
388 branch->Else = -1;
389 branch->Endif = -1;
390
391 if (s->CurrentBranchDepth > s->MaxBranchDepth)
392 s->MaxBranchDepth = s->CurrentBranchDepth;
393
394 /* actual instruction is filled in at ENDIF time */
395 } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
396 if (!s->CurrentBranchDepth) {
397 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
398 return;
399 }
400
401 struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
402 branch->Else = newip;
403
404 /* actual instruction is filled in at ENDIF time */
405 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
406 if (!s->CurrentBranchDepth) {
407 rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
408 return;
409 }
410
411 struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
412 branch->Endif = newip;
413
414 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
415 | R500_FC_A_OP_NONE /* no address stack */
416 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
417 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
418 ;
419
420 if (branch->Else >= 0) {
421 /* increment branch counter also if jump */
422 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
423 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
424
425 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
426 | R500_FC_A_OP_NONE /* no address stack */
427 | R500_FC_B_ELSE /* all active pixels want to jump */
428 | R500_FC_B_OP0_NONE /* no counter op if stay */
429 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
430 | R500_FC_B_POP_CNT(1)
431 ;
432 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
433 } else {
434 /* don't touch branch counter on jump */
435 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
436 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
437 }
438
439 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
440 | R500_FC_A_OP_NONE /* no address stack */
441 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
442 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
443 | R500_FC_B_OP1_NONE /* no branch counter if stay */
444 | R500_FC_B_POP_CNT(1)
445 ;
446 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
447
448 s->CurrentBranchDepth--;
449 } else {
450 rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode);
451 }
452 }
453
454 void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
455 {
456 struct emit_state s;
457 struct r500_fragment_program_code *code = &compiler->code->code.r500;
458
459 memset(&s, 0, sizeof(s));
460 s.C = &compiler->Base;
461 s.Code = code;
462
463 memset(code, 0, sizeof(*code));
464 code->max_temp_idx = 1;
465 code->inst_end = -1;
466
467 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
468 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
469 inst = inst->Next) {
470 if (inst->Type == RC_INSTRUCTION_NORMAL) {
471 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
472
473 if (opcode->IsFlowControl) {
474 emit_flowcontrol(&s, inst);
475 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
476 continue;
477 } else {
478 emit_tex(compiler, &inst->U.I);
479 }
480 } else {
481 emit_paired(compiler, &inst->U.P);
482 }
483 }
484
485 if (code->max_temp_idx >= 128)
486 rc_error(&compiler->Base, "Too many hardware temporaries used");
487
488 if (compiler->Base.Error)
489 return;
490
491 if (code->inst_end == -1 ||
492 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
493 /* This may happen when dead-code elimination is disabled or
494 * when most of the fragment program logic is leading to a KIL */
495 if (code->inst_end >= 511) {
496 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
497 return;
498 }
499
500 int ip = ++code->inst_end;
501 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
502 }
503
504 if (s.MaxBranchDepth >= 4) {
505 if (code->max_temp_idx < 1)
506 code->max_temp_idx = 1;
507
508 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
509 }
510 }