r300/compiler: Refactor the radeon_pair code to support control flow instructions
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "../r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48
49 #define PROG_CODE \
50 struct r500_fragment_program_code *code = &c->code->code.r500
51
52 #define error(fmt, args...) do { \
53 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
54 __FILE__, __FUNCTION__, ##args); \
55 } while(0)
56
57
58 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
59 {
60 switch(opcode) {
61 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
62 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
63 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
64 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
65 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
66 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
67 default:
68 error("translate_rgb_op(%d): unknown opcode\n", opcode);
69 /* fall through */
70 case RC_OPCODE_NOP:
71 /* fall through */
72 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
73 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
74 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
75 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
76 }
77 }
78
79 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
80 {
81 switch(opcode) {
82 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
83 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
84 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
85 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
86 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
87 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
88 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
89 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
90 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
91 default:
92 error("translate_alpha_op(%d): unknown opcode\n", opcode);
93 /* fall through */
94 case RC_OPCODE_NOP:
95 /* fall through */
96 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
97 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
98 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
99 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
100 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
101 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
102 }
103 }
104
105 static unsigned int fix_hw_swizzle(unsigned int swz)
106 {
107 if (swz == 5) swz = 6;
108 if (swz == RC_SWIZZLE_UNUSED) swz = 4;
109 return swz;
110 }
111
112 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
113 {
114 unsigned int t = inst->RGB.Arg[arg].Source;
115 int comp;
116 t |= inst->RGB.Arg[arg].Negate << 11;
117 t |= inst->RGB.Arg[arg].Abs << 12;
118
119 for(comp = 0; comp < 3; ++comp)
120 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
121
122 return t;
123 }
124
125 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
126 {
127 unsigned int t = inst->Alpha.Arg[i].Source;
128 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
129 t |= inst->Alpha.Arg[i].Negate << 5;
130 t |= inst->Alpha.Arg[i].Abs << 6;
131 return t;
132 }
133
134 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
135 {
136 if (index > code->max_temp_idx)
137 code->max_temp_idx = index;
138 }
139
140 static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
141 {
142 if (src.File == RC_FILE_CONSTANT) {
143 return src.Index | 0x100;
144 } else if (src.File == RC_FILE_TEMPORARY) {
145 use_temporary(code, src.Index);
146 return src.Index;
147 }
148
149 return 0;
150 }
151
152
153 /**
154 * Emit a paired ALU instruction.
155 */
156 static int emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
157 {
158 PROG_CODE;
159
160 if (code->inst_end >= 511) {
161 error("emit_alu: Too many instructions");
162 return 0;
163 }
164
165 int ip = ++code->inst_end;
166
167 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
168 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
169
170 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
171 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
172 else
173 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
174 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
175
176 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
177 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
178 if (inst->Alpha.DepthWriteMask) {
179 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
180 c->code->writes_depth = 1;
181 }
182
183 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
184 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
185 use_temporary(code, inst->Alpha.DestIndex);
186 use_temporary(code, inst->RGB.DestIndex);
187
188 if (inst->RGB.Saturate)
189 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
190 if (inst->Alpha.Saturate)
191 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
192
193 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
194 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
195 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
196
197 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
198 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
199 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
200
201 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
202 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
203 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
204
205 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
206 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
207 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
208
209 return 1;
210 }
211
212 static unsigned int translate_strq_swizzle(unsigned int swizzle)
213 {
214 unsigned int swiz = 0;
215 int i;
216 for (i = 0; i < 4; i++)
217 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
218 return swiz;
219 }
220
221 /**
222 * Emit a single TEX instruction
223 */
224 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
225 {
226 PROG_CODE;
227
228 if (code->inst_end >= 511) {
229 error("emit_tex: Too many instructions");
230 return 0;
231 }
232
233 int ip = ++code->inst_end;
234
235 code->inst[ip].inst0 = R500_INST_TYPE_TEX
236 | (inst->DstReg.WriteMask << 11)
237 | R500_INST_TEX_SEM_WAIT;
238 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
239 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
240
241 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
242 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
243
244 switch (inst->Opcode) {
245 case RC_OPCODE_KIL:
246 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
247 break;
248 case RC_OPCODE_TEX:
249 code->inst[ip].inst1 |= R500_TEX_INST_LD;
250 break;
251 case RC_OPCODE_TXB:
252 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
253 break;
254 case RC_OPCODE_TXP:
255 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
256 break;
257 default:
258 error("emit_tex can't handle opcode %x\n", inst->Opcode);
259 }
260
261 use_temporary(code, inst->SrcReg[0].Index);
262 if (inst->Opcode != RC_OPCODE_KIL)
263 use_temporary(code, inst->DstReg.Index);
264
265 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
266 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
267 | R500_TEX_DST_ADDR(inst->DstReg.Index)
268 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
269 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
270
271 return 1;
272 }
273
274 void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
275 {
276 struct r500_fragment_program_code *code = &compiler->code->code.r500;
277
278 memset(code, 0, sizeof(*code));
279 code->max_temp_idx = 1;
280 code->inst_end = -1;
281
282 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
283 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
284 inst = inst->Next) {
285 if (inst->Type == RC_INSTRUCTION_NORMAL) {
286 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX)
287 continue;
288
289 emit_tex(compiler, &inst->U.I);
290 } else {
291 emit_paired(compiler, &inst->U.P);
292 }
293 }
294
295 if (code->max_temp_idx >= 128)
296 rc_error(&compiler->Base, "Too many hardware temporaries used");
297
298 if (compiler->Base.Error)
299 return;
300
301 if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
302 /* This may happen when dead-code elimination is disabled or
303 * when most of the fragment program logic is leading to a KIL */
304 if (code->inst_end >= 511) {
305 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
306 return;
307 }
308
309 int ip = ++code->inst_end;
310 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
311 }
312 }