Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r500_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 * \todo Depth write, WPOS/FOGC inputs
41 *
42 * \todo FogOption
43 *
44 */
45
46 #include "r500_fragprog.h"
47
48 #include "radeon_program_pair.h"
49
50
51 #define PROG_CODE \
52 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
53 struct r500_fragment_program_code *code = &c->code->r500
54
55 #define error(fmt, args...) do { \
56 fprintf(stderr, "%s::%s(): " fmt "\n", \
57 __FILE__, __FUNCTION__, ##args); \
58 } while(0)
59
60
61 /**
62 * Callback to register hardware constants.
63 */
64 static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
65 {
66 PROG_CODE;
67
68 for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
69 if (code->constant[*hwindex].File == file &&
70 code->constant[*hwindex].Index == idx)
71 break;
72 }
73
74 if (*hwindex >= code->const_nr) {
75 if (*hwindex >= R500_PFS_NUM_CONST_REGS) {
76 error("Out of hw constants!\n");
77 return GL_FALSE;
78 }
79
80 code->const_nr++;
81 code->constant[*hwindex].File = file;
82 code->constant[*hwindex].Index = idx;
83 }
84
85 return GL_TRUE;
86 }
87
88 static GLuint translate_rgb_op(GLuint opcode)
89 {
90 switch(opcode) {
91 case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
92 case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
93 case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
94 case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
95 case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
96 case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
97 default:
98 error("translate_rgb_op(%d): unknown opcode\n", opcode);
99 /* fall through */
100 case OPCODE_NOP:
101 /* fall through */
102 case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
103 case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
104 case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
105 case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
106 }
107 }
108
109 static GLuint translate_alpha_op(GLuint opcode)
110 {
111 switch(opcode) {
112 case OPCODE_CMP: return R500_ALPHA_OP_CMP;
113 case OPCODE_COS: return R500_ALPHA_OP_COS;
114 case OPCODE_DDX: return R500_ALPHA_OP_MDH;
115 case OPCODE_DDY: return R500_ALPHA_OP_MDV;
116 case OPCODE_DP3: return R500_ALPHA_OP_DP;
117 case OPCODE_DP4: return R500_ALPHA_OP_DP;
118 case OPCODE_EX2: return R500_ALPHA_OP_EX2;
119 case OPCODE_FRC: return R500_ALPHA_OP_FRC;
120 case OPCODE_LG2: return R500_ALPHA_OP_LN2;
121 default:
122 error("translate_alpha_op(%d): unknown opcode\n", opcode);
123 /* fall through */
124 case OPCODE_NOP:
125 /* fall through */
126 case OPCODE_MAD: return R500_ALPHA_OP_MAD;
127 case OPCODE_MAX: return R500_ALPHA_OP_MAX;
128 case OPCODE_MIN: return R500_ALPHA_OP_MIN;
129 case OPCODE_RCP: return R500_ALPHA_OP_RCP;
130 case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
131 case OPCODE_SIN: return R500_ALPHA_OP_SIN;
132 }
133 }
134
135 static GLuint fix_hw_swizzle(GLuint swz)
136 {
137 if (swz == 5) swz = 6;
138 if (swz == SWIZZLE_NIL) swz = 4;
139 return swz;
140 }
141
142 static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
143 {
144 GLuint t = inst->RGB.Arg[arg].Source;
145 int comp;
146 t |= inst->RGB.Arg[arg].Negate << 11;
147 t |= inst->RGB.Arg[arg].Abs << 12;
148
149 for(comp = 0; comp < 3; ++comp)
150 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
151
152 return t;
153 }
154
155 static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
156 {
157 GLuint t = inst->Alpha.Arg[i].Source;
158 t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
159 t |= inst->Alpha.Arg[i].Negate << 5;
160 t |= inst->Alpha.Arg[i].Abs << 6;
161 return t;
162 }
163
164 static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
165 {
166 if (index > code->max_temp_idx)
167 code->max_temp_idx = index;
168 }
169
170 static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
171 {
172 if (!src.Constant)
173 use_temporary(code, src.Index);
174 return src.Index | src.Constant << 8;
175 }
176
177
178 /**
179 * Emit a paired ALU instruction.
180 */
181 static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
182 {
183 PROG_CODE;
184
185 if (code->inst_end >= 511) {
186 error("emit_alu: Too many instructions");
187 return GL_FALSE;
188 }
189
190 int ip = ++code->inst_end;
191
192 code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
193 code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
194
195 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
196 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
197 else
198 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
199 code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
200
201 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
202 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
203 if (inst->Alpha.DepthWriteMask) {
204 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
205 c->fp->writes_depth = GL_TRUE;
206 }
207
208 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
209 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
210 use_temporary(code, inst->Alpha.DestIndex);
211 use_temporary(code, inst->RGB.DestIndex);
212
213 if (inst->RGB.Saturate)
214 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
215 if (inst->Alpha.Saturate)
216 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
217
218 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
219 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
220 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
221
222 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
223 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
224 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
225
226 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
227 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
228 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
229
230 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
231 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
232 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
233
234 return GL_TRUE;
235 }
236
237 static GLuint translate_strq_swizzle(struct prog_src_register src)
238 {
239 GLuint swiz = 0;
240 int i;
241 for (i = 0; i < 4; i++)
242 swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
243 return swiz;
244 }
245
246 /**
247 * Emit a single TEX instruction
248 */
249 static GLboolean emit_tex(void *data, struct prog_instruction *inst)
250 {
251 PROG_CODE;
252
253 if (code->inst_end >= 511) {
254 error("emit_tex: Too many instructions");
255 return GL_FALSE;
256 }
257
258 int ip = ++code->inst_end;
259
260 code->inst[ip].inst0 = R500_INST_TYPE_TEX
261 | (inst->DstReg.WriteMask << 11)
262 | R500_INST_TEX_SEM_WAIT;
263 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
264 | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
265
266 if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
267 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
268
269 switch (inst->Opcode) {
270 case OPCODE_KIL:
271 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
272 break;
273 case OPCODE_TEX:
274 code->inst[ip].inst1 |= R500_TEX_INST_LD;
275 break;
276 case OPCODE_TXB:
277 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
278 break;
279 case OPCODE_TXP:
280 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
281 break;
282 default:
283 error("emit_tex can't handle opcode %x\n", inst->Opcode);
284 }
285
286 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
287 | (translate_strq_swizzle(inst->SrcReg[0]) << 8)
288 | R500_TEX_DST_ADDR(inst->DstReg.Index)
289 | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
290 | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
291
292 return GL_TRUE;
293 }
294
295 static const struct radeon_pair_handler pair_handler = {
296 .EmitConst = emit_const,
297 .EmitPaired = emit_paired,
298 .EmitTex = emit_tex,
299 .MaxHwTemps = 128
300 };
301
302 GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
303 {
304 struct r500_fragment_program_code *code = &compiler->code->r500;
305
306 _mesa_bzero(code, sizeof(*code));
307 code->max_temp_idx = 1;
308 code->inst_offset = 0;
309 code->inst_end = -1;
310
311 if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
312 return GL_FALSE;
313
314 if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
315 /* This may happen when dead-code elimination is disabled or
316 * when most of the fragment program logic is leading to a KIL */
317 if (code->inst_end >= 511) {
318 error("Introducing fake OUT: Too many instructions");
319 return GL_FALSE;
320 }
321
322 int ip = ++code->inst_end;
323 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
324 }
325
326 return GL_TRUE;
327 }