r300: Allow compiler to add constants in a cleaner way
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r300_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \todo FogOption
39 */
40
41 #include "r300_fragprog.h"
42
43 #include "../r300_reg.h"
44
45 #include "radeon_program_pair.h"
46 #include "r300_fragprog_swizzle.h"
47
48
49 #define PROG_CODE \
50 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
51 struct r300_fragment_program_code *code = &c->code->code.r300
52
53 #define error(fmt, args...) do { \
54 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
55 __FILE__, __FUNCTION__, ##args); \
56 } while(0)
57
58
59 /**
60 * Mark a temporary register as used.
61 */
62 static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
63 {
64 if (index > code->max_temp_idx)
65 code->max_temp_idx = index;
66 }
67
68
69 static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
70 {
71 switch(opcode) {
72 case OPCODE_CMP: return R300_ALU_OUTC_CMP;
73 case OPCODE_DP3: return R300_ALU_OUTC_DP3;
74 case OPCODE_DP4: return R300_ALU_OUTC_DP4;
75 case OPCODE_FRC: return R300_ALU_OUTC_FRC;
76 default:
77 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
78 /* fall through */
79 case OPCODE_NOP:
80 /* fall through */
81 case OPCODE_MAD: return R300_ALU_OUTC_MAD;
82 case OPCODE_MAX: return R300_ALU_OUTC_MAX;
83 case OPCODE_MIN: return R300_ALU_OUTC_MIN;
84 case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
85 }
86 }
87
88 static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
89 {
90 switch(opcode) {
91 case OPCODE_CMP: return R300_ALU_OUTA_CMP;
92 case OPCODE_DP3: return R300_ALU_OUTA_DP4;
93 case OPCODE_DP4: return R300_ALU_OUTA_DP4;
94 case OPCODE_EX2: return R300_ALU_OUTA_EX2;
95 case OPCODE_FRC: return R300_ALU_OUTA_FRC;
96 case OPCODE_LG2: return R300_ALU_OUTA_LG2;
97 default:
98 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
99 /* fall through */
100 case OPCODE_NOP:
101 /* fall through */
102 case OPCODE_MAD: return R300_ALU_OUTA_MAD;
103 case OPCODE_MAX: return R300_ALU_OUTA_MAX;
104 case OPCODE_MIN: return R300_ALU_OUTA_MIN;
105 case OPCODE_RCP: return R300_ALU_OUTA_RCP;
106 case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
107 }
108 }
109
110 /**
111 * Emit one paired ALU instruction.
112 */
113 static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
114 {
115 PROG_CODE;
116
117 if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
118 error("Too many ALU instructions");
119 return GL_FALSE;
120 }
121
122 int ip = code->alu.length++;
123 int j;
124 code->node[code->cur_node].alu_end++;
125
126 code->alu.inst[ip].inst0 = translate_rgb_opcode(c, inst->RGB.Opcode);
127 code->alu.inst[ip].inst2 = translate_alpha_opcode(c, inst->Alpha.Opcode);
128
129 for(j = 0; j < 3; ++j) {
130 GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
131 if (!inst->RGB.Src[j].Constant)
132 use_temporary(code, inst->RGB.Src[j].Index);
133 code->alu.inst[ip].inst1 |= src << (6*j);
134
135 src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
136 if (!inst->Alpha.Src[j].Constant)
137 use_temporary(code, inst->Alpha.Src[j].Index);
138 code->alu.inst[ip].inst3 |= src << (6*j);
139
140 GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
141 arg |= inst->RGB.Arg[j].Abs << 6;
142 arg |= inst->RGB.Arg[j].Negate << 5;
143 code->alu.inst[ip].inst0 |= arg << (7*j);
144
145 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
146 arg |= inst->Alpha.Arg[j].Abs << 6;
147 arg |= inst->Alpha.Arg[j].Negate << 5;
148 code->alu.inst[ip].inst2 |= arg << (7*j);
149 }
150
151 if (inst->RGB.Saturate)
152 code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
153 if (inst->Alpha.Saturate)
154 code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
155
156 if (inst->RGB.WriteMask) {
157 use_temporary(code, inst->RGB.DestIndex);
158 code->alu.inst[ip].inst1 |=
159 (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
160 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
161 }
162 if (inst->RGB.OutputWriteMask) {
163 code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
164 code->node[code->cur_node].flags |= R300_RGBA_OUT;
165 }
166
167 if (inst->Alpha.WriteMask) {
168 use_temporary(code, inst->Alpha.DestIndex);
169 code->alu.inst[ip].inst3 |=
170 (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
171 R300_ALU_DSTA_REG;
172 }
173 if (inst->Alpha.OutputWriteMask) {
174 code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
175 code->node[code->cur_node].flags |= R300_RGBA_OUT;
176 }
177 if (inst->Alpha.DepthWriteMask) {
178 code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
179 code->node[code->cur_node].flags |= R300_W_OUT;
180 c->code->writes_depth = GL_TRUE;
181 }
182
183 return GL_TRUE;
184 }
185
186
187 /**
188 * Finish the current node without advancing to the next one.
189 */
190 static GLboolean finish_node(struct r300_fragment_program_compiler *c)
191 {
192 struct r300_fragment_program_code *code = &c->code->code.r300;
193 struct r300_fragment_program_node *node = &code->node[code->cur_node];
194
195 if (node->alu_end < 0) {
196 /* Generate a single NOP for this node */
197 struct radeon_pair_instruction inst;
198 _mesa_bzero(&inst, sizeof(inst));
199 if (!emit_alu(c, &inst))
200 return GL_FALSE;
201 }
202
203 if (node->tex_end < 0) {
204 if (code->cur_node == 0) {
205 node->tex_end = 0;
206 } else {
207 error("Node %i has no TEX instructions", code->cur_node);
208 return GL_FALSE;
209 }
210 } else {
211 if (code->cur_node == 0)
212 code->first_node_has_tex = 1;
213 }
214
215 return GL_TRUE;
216 }
217
218
219 /**
220 * Begin a block of texture instructions.
221 * Create the necessary indirection.
222 */
223 static GLboolean begin_tex(void* data)
224 {
225 PROG_CODE;
226
227 if (code->cur_node == 0) {
228 if (code->node[0].alu_end < 0 &&
229 code->node[0].tex_end < 0)
230 return GL_TRUE;
231 }
232
233 if (code->cur_node == 3) {
234 error("Too many texture indirections");
235 return GL_FALSE;
236 }
237
238 if (!finish_node(c))
239 return GL_FALSE;
240
241 struct r300_fragment_program_node *node = &code->node[++code->cur_node];
242 node->alu_offset = code->alu.length;
243 node->alu_end = -1;
244 node->tex_offset = code->tex.length;
245 node->tex_end = -1;
246 return GL_TRUE;
247 }
248
249
250 static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
251 {
252 PROG_CODE;
253
254 if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
255 error("Too many TEX instructions");
256 return GL_FALSE;
257 }
258
259 GLuint unit = inst->TexSrcUnit;
260 GLuint dest = inst->DestIndex;
261 GLuint opcode;
262
263 switch(inst->Opcode) {
264 case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
265 case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
266 case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
267 case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
268 default:
269 error("Unknown texture opcode %i", inst->Opcode);
270 return GL_FALSE;
271 }
272
273 if (inst->Opcode == RADEON_OPCODE_KIL) {
274 unit = 0;
275 dest = 0;
276 } else {
277 use_temporary(code, dest);
278 }
279
280 use_temporary(code, inst->SrcIndex);
281
282 code->node[code->cur_node].tex_end++;
283 code->tex.inst[code->tex.length++] =
284 (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
285 (dest << R300_DST_ADDR_SHIFT) |
286 (unit << R300_TEX_ID_SHIFT) |
287 (opcode << R300_TEX_INST_SHIFT);
288 return GL_TRUE;
289 }
290
291
292 static const struct radeon_pair_handler pair_handler = {
293 .EmitPaired = &emit_alu,
294 .EmitTex = &emit_tex,
295 .BeginTexBlock = &begin_tex,
296 .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
297 };
298
299 /**
300 * Final compilation step: Turn the intermediate radeon_program into
301 * machine-readable instructions.
302 */
303 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
304 {
305 struct r300_fragment_program_code *code = &compiler->code->code.r300;
306
307 _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
308 code->node[0].alu_end = -1;
309 code->node[0].tex_end = -1;
310
311 radeonPairProgram(&compiler->Base, &pair_handler, compiler);
312 if (compiler->Base.Error)
313 return;
314
315 finish_node(compiler);
316 }
317