Merge remote branch 'origin/master' into radeon-rewrite
[mesa.git] / src / mesa / drivers / dri / r300 / r300_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \todo FogOption
39 */
40
41 #include "r300_fragprog.h"
42
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
45 #include "r300_reg.h"
46
47
48 #define PROG_CODE \
49 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
50 struct r300_fragment_program_code *code = &c->code->r300
51
52 #define error(fmt, args...) do { \
53 fprintf(stderr, "%s::%s(): " fmt "\n", \
54 __FILE__, __FUNCTION__, ##args); \
55 } while(0)
56
57
58 static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
59 {
60 PROG_CODE;
61
62 for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
63 if (code->constant[*hwindex].File == file &&
64 code->constant[*hwindex].Index == index)
65 break;
66 }
67
68 if (*hwindex >= code->const_nr) {
69 if (*hwindex >= PFS_NUM_CONST_REGS) {
70 error("Out of hw constants!\n");
71 return GL_FALSE;
72 }
73
74 code->const_nr++;
75 code->constant[*hwindex].File = file;
76 code->constant[*hwindex].Index = index;
77 }
78
79 return GL_TRUE;
80 }
81
82
83 /**
84 * Mark a temporary register as used.
85 */
86 static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
87 {
88 if (index > code->max_temp_idx)
89 code->max_temp_idx = index;
90 }
91
92
93 static GLuint translate_rgb_opcode(GLuint opcode)
94 {
95 switch(opcode) {
96 case OPCODE_CMP: return R300_ALU_OUTC_CMP;
97 case OPCODE_DP3: return R300_ALU_OUTC_DP3;
98 case OPCODE_DP4: return R300_ALU_OUTC_DP4;
99 case OPCODE_FRC: return R300_ALU_OUTC_FRC;
100 default:
101 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
102 /* fall through */
103 case OPCODE_NOP:
104 /* fall through */
105 case OPCODE_MAD: return R300_ALU_OUTC_MAD;
106 case OPCODE_MAX: return R300_ALU_OUTC_MAX;
107 case OPCODE_MIN: return R300_ALU_OUTC_MIN;
108 case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
109 }
110 }
111
112 static GLuint translate_alpha_opcode(GLuint opcode)
113 {
114 switch(opcode) {
115 case OPCODE_CMP: return R300_ALU_OUTA_CMP;
116 case OPCODE_DP3: return R300_ALU_OUTA_DP4;
117 case OPCODE_DP4: return R300_ALU_OUTA_DP4;
118 case OPCODE_EX2: return R300_ALU_OUTA_EX2;
119 case OPCODE_FRC: return R300_ALU_OUTA_FRC;
120 case OPCODE_LG2: return R300_ALU_OUTA_LG2;
121 default:
122 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
123 /* fall through */
124 case OPCODE_NOP:
125 /* fall through */
126 case OPCODE_MAD: return R300_ALU_OUTA_MAD;
127 case OPCODE_MAX: return R300_ALU_OUTA_MAX;
128 case OPCODE_MIN: return R300_ALU_OUTA_MIN;
129 case OPCODE_RCP: return R300_ALU_OUTA_RCP;
130 case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
131 }
132 }
133
134 /**
135 * Emit one paired ALU instruction.
136 */
137 static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
138 {
139 PROG_CODE;
140
141 if (code->alu.length >= PFS_MAX_ALU_INST) {
142 error("Too many ALU instructions");
143 return GL_FALSE;
144 }
145
146 int ip = code->alu.length++;
147 int j;
148 code->node[code->cur_node].alu_end++;
149
150 code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
151 code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
152
153 for(j = 0; j < 3; ++j) {
154 GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
155 if (!inst->RGB.Src[j].Constant)
156 use_temporary(code, inst->RGB.Src[j].Index);
157 code->alu.inst[ip].inst1 |= src << (6*j);
158
159 src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
160 if (!inst->Alpha.Src[j].Constant)
161 use_temporary(code, inst->Alpha.Src[j].Index);
162 code->alu.inst[ip].inst3 |= src << (6*j);
163
164 GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
165 arg |= inst->RGB.Arg[j].Abs << 6;
166 arg |= inst->RGB.Arg[j].Negate << 5;
167 code->alu.inst[ip].inst0 |= arg << (7*j);
168
169 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
170 arg |= inst->Alpha.Arg[j].Abs << 6;
171 arg |= inst->Alpha.Arg[j].Negate << 5;
172 code->alu.inst[ip].inst2 |= arg << (7*j);
173 }
174
175 if (inst->RGB.Saturate)
176 code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
177 if (inst->Alpha.Saturate)
178 code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
179
180 if (inst->RGB.WriteMask) {
181 use_temporary(code, inst->RGB.DestIndex);
182 code->alu.inst[ip].inst1 |=
183 (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
184 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
185 }
186 if (inst->RGB.OutputWriteMask) {
187 code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
188 code->node[code->cur_node].flags |= R300_RGBA_OUT;
189 }
190
191 if (inst->Alpha.WriteMask) {
192 use_temporary(code, inst->Alpha.DestIndex);
193 code->alu.inst[ip].inst3 |=
194 (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
195 R300_ALU_DSTA_REG;
196 }
197 if (inst->Alpha.OutputWriteMask) {
198 code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
199 code->node[code->cur_node].flags |= R300_RGBA_OUT;
200 }
201 if (inst->Alpha.DepthWriteMask) {
202 code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
203 code->node[code->cur_node].flags |= R300_W_OUT;
204 c->fp->writes_depth = GL_TRUE;
205 }
206
207 return GL_TRUE;
208 }
209
210
211 /**
212 * Finish the current node without advancing to the next one.
213 */
214 static GLboolean finish_node(struct r300_fragment_program_compiler *c)
215 {
216 struct r300_fragment_program_code *code = &c->code->r300;
217 struct r300_fragment_program_node *node = &code->node[code->cur_node];
218
219 if (node->alu_end < 0) {
220 /* Generate a single NOP for this node */
221 struct radeon_pair_instruction inst;
222 _mesa_bzero(&inst, sizeof(inst));
223 if (!emit_alu(c, &inst))
224 return GL_FALSE;
225 }
226
227 if (node->tex_end < 0) {
228 if (code->cur_node == 0) {
229 node->tex_end = 0;
230 } else {
231 error("Node %i has no TEX instructions", code->cur_node);
232 return GL_FALSE;
233 }
234 } else {
235 if (code->cur_node == 0)
236 code->first_node_has_tex = 1;
237 }
238
239 return GL_TRUE;
240 }
241
242
243 /**
244 * Begin a block of texture instructions.
245 * Create the necessary indirection.
246 */
247 static GLboolean begin_tex(void* data)
248 {
249 PROG_CODE;
250
251 if (code->cur_node == 0) {
252 if (code->node[0].alu_end < 0 &&
253 code->node[0].tex_end < 0)
254 return GL_TRUE;
255 }
256
257 if (code->cur_node == 3) {
258 error("Too many texture indirections");
259 return GL_FALSE;
260 }
261
262 if (!finish_node(c))
263 return GL_FALSE;
264
265 struct r300_fragment_program_node *node = &code->node[++code->cur_node];
266 node->alu_offset = code->alu.length;
267 node->alu_end = -1;
268 node->tex_offset = code->tex.length;
269 node->tex_end = -1;
270 return GL_TRUE;
271 }
272
273
274 static GLboolean emit_tex(void* data, struct prog_instruction* inst)
275 {
276 PROG_CODE;
277
278 if (code->tex.length >= PFS_MAX_TEX_INST) {
279 error("Too many TEX instructions");
280 return GL_FALSE;
281 }
282
283 GLuint unit = inst->TexSrcUnit;
284 GLuint dest = inst->DstReg.Index;
285 GLuint opcode;
286
287 switch(inst->Opcode) {
288 case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
289 case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
290 case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
291 case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
292 default:
293 error("Unknown texture opcode %i", inst->Opcode);
294 return GL_FALSE;
295 }
296
297 if (inst->Opcode == OPCODE_KIL) {
298 unit = 0;
299 dest = 0;
300 } else {
301 use_temporary(code, dest);
302 }
303
304 use_temporary(code, inst->SrcReg[0].Index);
305
306 code->node[code->cur_node].tex_end++;
307 code->tex.inst[code->tex.length++] =
308 (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
309 (dest << R300_DST_ADDR_SHIFT) |
310 (unit << R300_TEX_ID_SHIFT) |
311 (opcode << R300_TEX_INST_SHIFT);
312 return GL_TRUE;
313 }
314
315
316 static const struct radeon_pair_handler pair_handler = {
317 .EmitConst = &emit_const,
318 .EmitPaired = &emit_alu,
319 .EmitTex = &emit_tex,
320 .BeginTexBlock = &begin_tex,
321 .MaxHwTemps = PFS_NUM_TEMP_REGS
322 };
323
324 /**
325 * Final compilation step: Turn the intermediate radeon_program into
326 * machine-readable instructions.
327 */
328 GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
329 {
330 struct r300_fragment_program_code *code = &compiler->code->r300;
331
332 _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
333 code->node[0].alu_end = -1;
334 code->node[0].tex_end = -1;
335
336 if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
337 return GL_FALSE;
338
339 if (!finish_node(compiler))
340 return GL_FALSE;
341
342 return GL_TRUE;
343 }
344