Merge branch '7.8'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r300_fragprog_emit.c
1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \todo FogOption
39 */
40
41 #include "r300_fragprog.h"
42
43 #include "../r300_reg.h"
44
45 #include "radeon_program_pair.h"
46 #include "r300_fragprog_swizzle.h"
47
48
49 struct r300_emit_state {
50 struct r300_fragment_program_compiler * compiler;
51
52 unsigned current_node : 2;
53 unsigned node_first_tex : 8;
54 unsigned node_first_alu : 8;
55 uint32_t node_flags;
56 };
57
58 #define PROG_CODE \
59 struct r300_fragment_program_compiler *c = emit->compiler; \
60 struct r300_fragment_program_code *code = &c->code->code.r300
61
62 #define error(fmt, args...) do { \
63 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
64 __FILE__, __FUNCTION__, ##args); \
65 } while(0)
66
67
68 /**
69 * Mark a temporary register as used.
70 */
71 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
72 {
73 if (index > code->pixsize)
74 code->pixsize = index;
75 }
76
77 static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
78 {
79 if (src.File == RC_FILE_CONSTANT) {
80 return src.Index | (1 << 5);
81 } else if (src.File == RC_FILE_TEMPORARY) {
82 use_temporary(code, src.Index);
83 return src.Index;
84 }
85
86 return 0;
87 }
88
89
90 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
91 {
92 switch(opcode) {
93 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
94 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
95 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
96 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
97 default:
98 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
99 /* fall through */
100 case RC_OPCODE_NOP:
101 /* fall through */
102 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
103 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
104 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
105 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
106 }
107 }
108
109 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
110 {
111 switch(opcode) {
112 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
113 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
114 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
115 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
116 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
117 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
118 default:
119 error("translate_rgb_opcode(%i): Unknown opcode", opcode);
120 /* fall through */
121 case RC_OPCODE_NOP:
122 /* fall through */
123 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
124 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
125 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
126 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
127 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
128 }
129 }
130
131 /**
132 * Emit one paired ALU instruction.
133 */
134 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
135 {
136 PROG_CODE;
137
138 if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
139 error("Too many ALU instructions");
140 return 0;
141 }
142
143 int ip = code->alu.length++;
144 int j;
145
146 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
147 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
148
149 for(j = 0; j < 3; ++j) {
150 unsigned int src = use_source(code, inst->RGB.Src[j]);
151 code->alu.inst[ip].rgb_addr |= src << (6*j);
152
153 src = use_source(code, inst->Alpha.Src[j]);
154 code->alu.inst[ip].alpha_addr |= src << (6*j);
155
156 unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
157 arg |= inst->RGB.Arg[j].Abs << 6;
158 arg |= inst->RGB.Arg[j].Negate << 5;
159 code->alu.inst[ip].rgb_inst |= arg << (7*j);
160
161 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
162 arg |= inst->Alpha.Arg[j].Abs << 6;
163 arg |= inst->Alpha.Arg[j].Negate << 5;
164 code->alu.inst[ip].alpha_inst |= arg << (7*j);
165 }
166
167 if (inst->RGB.Saturate)
168 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
169 if (inst->Alpha.Saturate)
170 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
171
172 if (inst->RGB.WriteMask) {
173 use_temporary(code, inst->RGB.DestIndex);
174 code->alu.inst[ip].rgb_addr |=
175 (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
176 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
177 }
178 if (inst->RGB.OutputWriteMask) {
179 code->alu.inst[ip].rgb_addr |=
180 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
181 R300_RGB_TARGET(inst->RGB.Target);
182 emit->node_flags |= R300_RGBA_OUT;
183 }
184
185 if (inst->Alpha.WriteMask) {
186 use_temporary(code, inst->Alpha.DestIndex);
187 code->alu.inst[ip].alpha_addr |=
188 (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
189 R300_ALU_DSTA_REG;
190 }
191 if (inst->Alpha.OutputWriteMask) {
192 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
193 R300_ALPHA_TARGET(inst->Alpha.Target);
194 emit->node_flags |= R300_RGBA_OUT;
195 }
196 if (inst->Alpha.DepthWriteMask) {
197 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
198 emit->node_flags |= R300_W_OUT;
199 c->code->writes_depth = 1;
200 }
201
202 return 1;
203 }
204
205
206 /**
207 * Finish the current node without advancing to the next one.
208 */
209 static int finish_node(struct r300_emit_state * emit)
210 {
211 struct r300_fragment_program_compiler * c = emit->compiler;
212 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
213
214 if (code->alu.length == emit->node_first_alu) {
215 /* Generate a single NOP for this node */
216 struct rc_pair_instruction inst;
217 memset(&inst, 0, sizeof(inst));
218 if (!emit_alu(emit, &inst))
219 return 0;
220 }
221
222 unsigned alu_offset = emit->node_first_alu;
223 unsigned alu_end = code->alu.length - alu_offset - 1;
224 unsigned tex_offset = emit->node_first_tex;
225 unsigned tex_end = code->tex.length - tex_offset - 1;
226
227 if (code->tex.length == emit->node_first_tex) {
228 if (emit->current_node > 0) {
229 error("Node %i has no TEX instructions", emit->current_node);
230 return 0;
231 }
232
233 tex_end = 0;
234 } else {
235 if (emit->current_node == 0)
236 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
237 }
238
239 /* Write the config register.
240 * Note: The order in which the words for each node are written
241 * is not correct here and needs to be fixed up once we're entirely
242 * done
243 *
244 * Also note that the register specification from AMD is slightly
245 * incorrect in its description of this register. */
246 code->code_addr[emit->current_node] =
247 (alu_offset << R300_ALU_START_SHIFT) |
248 (alu_end << R300_ALU_SIZE_SHIFT) |
249 (tex_offset << R300_TEX_START_SHIFT) |
250 (tex_end << R300_TEX_SIZE_SHIFT) |
251 emit->node_flags;
252
253 return 1;
254 }
255
256
257 /**
258 * Begin a block of texture instructions.
259 * Create the necessary indirection.
260 */
261 static int begin_tex(struct r300_emit_state * emit)
262 {
263 PROG_CODE;
264
265 if (code->alu.length == emit->node_first_alu &&
266 code->tex.length == emit->node_first_tex) {
267 return 1;
268 }
269
270 if (emit->current_node == 3) {
271 error("Too many texture indirections");
272 return 0;
273 }
274
275 if (!finish_node(emit))
276 return 0;
277
278 emit->current_node++;
279 emit->node_first_tex = code->tex.length;
280 emit->node_first_alu = code->alu.length;
281 emit->node_flags = 0;
282 return 1;
283 }
284
285
286 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
287 {
288 PROG_CODE;
289
290 if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
291 error("Too many TEX instructions");
292 return 0;
293 }
294
295 unsigned int unit = inst->U.I.TexSrcUnit;
296 unsigned int dest = inst->U.I.DstReg.Index;
297 unsigned int opcode;
298
299 switch(inst->U.I.Opcode) {
300 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
301 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
302 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
303 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
304 default:
305 error("Unknown texture opcode %i", inst->U.I.Opcode);
306 return 0;
307 }
308
309 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
310 unit = 0;
311 dest = 0;
312 } else {
313 use_temporary(code, dest);
314 }
315
316 use_temporary(code, inst->U.I.SrcReg[0].Index);
317
318 code->tex.inst[code->tex.length++] =
319 (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
320 (dest << R300_DST_ADDR_SHIFT) |
321 (unit << R300_TEX_ID_SHIFT) |
322 (opcode << R300_TEX_INST_SHIFT);
323 return 1;
324 }
325
326
327 /**
328 * Final compilation step: Turn the intermediate radeon_program into
329 * machine-readable instructions.
330 */
331 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
332 {
333 struct r300_emit_state emit;
334 struct r300_fragment_program_code *code = &compiler->code->code.r300;
335
336 memset(&emit, 0, sizeof(emit));
337 emit.compiler = compiler;
338
339 memset(code, 0, sizeof(struct r300_fragment_program_code));
340
341 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
342 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
343 inst = inst->Next) {
344 if (inst->Type == RC_INSTRUCTION_NORMAL) {
345 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
346 begin_tex(&emit);
347 continue;
348 }
349
350 emit_tex(&emit, inst);
351 } else {
352 emit_alu(&emit, &inst->U.P);
353 }
354 }
355
356 if (code->pixsize >= compiler->max_temp_regs)
357 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
358
359 if (compiler->Base.Error)
360 return;
361
362 /* Finish the program */
363 finish_node(&emit);
364
365 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
366 code->code_offset =
367 (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
368 ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
369 (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
370 ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
371
372 if (emit.current_node < 3) {
373 int shift = 3 - emit.current_node;
374 int i;
375 for(i = emit.current_node; i >= 0; --i)
376 code->code_addr[shift + i] = code->code_addr[i];
377 for(i = 0; i < shift; ++i)
378 code->code_addr[i] = 0;
379 }
380 }