r300g: add a new debug option which disables compiler optimizations
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "program/program.h"
35 #include "program/programopt.h"
36 #include "program/prog_instruction.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/prog_statevars.h"
40 #include "tnl/tnl.h"
41
42 #include "compiler/radeon_compiler.h"
43 #include "radeon_mesa_to_rc.h"
44 #include "r300_context.h"
45 #include "r300_fragprog_common.h"
46 #include "r300_state.h"
47
48 /**
49 * Write parameter array for the given vertex program into dst.
50 * Return the total number of components written.
51 */
52 static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
53 {
54 int i;
55
56 if (vp->Base->IsNVProgram) {
57 _mesa_load_tracked_matrices(ctx);
58 } else {
59 if (vp->Base->Base.Parameters) {
60 _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
61 }
62 }
63
64 for(i = 0; i < vp->code.constants.Count; ++i) {
65 const float * src = 0;
66 const struct rc_constant * constant = &vp->code.constants.Constants[i];
67
68 switch(constant->Type) {
69 case RC_CONSTANT_EXTERNAL:
70 if (vp->Base->IsNVProgram) {
71 src = ctx->VertexProgram.Parameters[constant->u.External];
72 } else {
73 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
74 }
75 break;
76
77 case RC_CONSTANT_IMMEDIATE:
78 src = constant->u.Immediate;
79 break;
80 }
81
82 assert(src);
83 dst[4*i] = src[0];
84 dst[4*i + 1] = src[1];
85 dst[4*i + 2] = src[2];
86 dst[4*i + 3] = src[3];
87 }
88
89 return 4 * vp->code.constants.Count;
90 }
91
92 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
93 {
94 GLbitfield outputs = 0;
95 int i;
96
97 #define ADD_OUTPUT(fp_attr, vp_result) \
98 do { \
99 if (fpreads & (1 << (fp_attr))) \
100 outputs |= (1 << (vp_result)); \
101 } while (0)
102
103 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
104 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
105
106 for (i = 0; i <= 7; ++i) {
107 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
108 }
109
110 #undef ADD_OUTPUT
111
112 if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
113 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
114 outputs |= 1 << VERT_RESULT_BFC0;
115 if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
116 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
117 outputs |= 1 << VERT_RESULT_BFC1;
118
119 outputs |= 1 << VERT_RESULT_HPOS;
120 if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
121 outputs |= 1 << VERT_RESULT_PSIZ;
122
123 return outputs;
124 }
125
126
127 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
128 {
129 int i;
130 int cur_reg;
131 GLuint OutputsWritten, InputsRead;
132
133 OutputsWritten = c->Base.Program.OutputsWritten;
134 InputsRead = c->Base.Program.InputsRead;
135
136 cur_reg = -1;
137 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
138 if (InputsRead & (1 << i))
139 c->code->inputs[i] = ++cur_reg;
140 else
141 c->code->inputs[i] = -1;
142 }
143
144 cur_reg = 0;
145 for (i = 0; i < VERT_RESULT_MAX; i++)
146 c->code->outputs[i] = -1;
147
148 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
149
150 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
151 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
152 }
153
154 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
155 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
156 }
157
158 /* If we're writing back facing colors we need to send
159 * four colors to make front/back face colors selection work.
160 * If the vertex program doesn't write all 4 colors, lets
161 * pretend it does by skipping output index reg so the colors
162 * get written into appropriate output vectors.
163 */
164 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
165 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
166 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
167 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
168 cur_reg++;
169 }
170
171 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
172 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
173 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
174 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
175 cur_reg++;
176 }
177
178 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
179 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
180 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
181 cur_reg++;
182 }
183
184 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
185 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
186 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
187 cur_reg++;
188 }
189
190 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
191 if (OutputsWritten & (1 << i)) {
192 c->code->outputs[i] = cur_reg++;
193 }
194 }
195
196 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
197 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
198 }
199 }
200
201 /**
202 * The NV_vertex_program spec mandates that all registers be
203 * initialized to zero. We do this here unconditionally.
204 *
205 * \note We rely on dead-code elimination in the compiler.
206 */
207 static void initialize_NV_registers(struct radeon_compiler * compiler)
208 {
209 unsigned int reg;
210 struct rc_instruction * inst;
211
212 for(reg = 0; reg < 12; ++reg) {
213 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
214 inst->U.I.Opcode = RC_OPCODE_MOV;
215 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
216 inst->U.I.DstReg.Index = reg;
217 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
218 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
219 }
220
221 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
222 inst->U.I.Opcode = RC_OPCODE_ARL;
223 inst->U.I.DstReg.File = RC_FILE_ADDRESS;
224 inst->U.I.DstReg.Index = 0;
225 inst->U.I.DstReg.WriteMask = WRITEMASK_X;
226 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
227 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
228 }
229
230 static struct r300_vertex_program *build_program(GLcontext *ctx,
231 struct r300_vertex_program_key *wanted_key,
232 const struct gl_vertex_program *mesa_vp)
233 {
234 struct r300_vertex_program *vp;
235 struct r300_vertex_program_compiler compiler;
236
237 vp = calloc(1, sizeof(*vp));
238 vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp);
239 memcpy(&vp->key, wanted_key, sizeof(vp->key));
240
241 rc_init(&compiler.Base);
242 compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
243
244 compiler.code = &vp->code;
245 compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
246 compiler.SetHwInputOutput = &t_inputs_outputs;
247 compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
248 compiler.Base.disable_optimizations = 0;
249 compiler.Base.has_half_swizzles = 0;
250 compiler.Base.max_temp_regs = 32;
251 compiler.Base.max_constants = 256;
252 compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
253
254 if (compiler.Base.Debug) {
255 fprintf(stderr, "Initial vertex program:\n");
256 _mesa_print_program(&vp->Base->Base);
257 fflush(stderr);
258 }
259
260 if (mesa_vp->IsPositionInvariant) {
261 _mesa_insert_mvp_code(ctx, vp->Base);
262 }
263
264 radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
265
266 if (mesa_vp->IsNVProgram)
267 initialize_NV_registers(&compiler.Base);
268
269 rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
270
271 if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
272 unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
273
274 /* Set empty writemask for instructions writing to vp_wpos_attr
275 * before moving the wpos attr there.
276 * Such instructions will be removed by DCE.
277 */
278 rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
279 rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
280 }
281
282 if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
283 unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
284
285 /* Set empty writemask for instructions writing to vp_fog_attr
286 * before moving the fog attr there.
287 * Such instructions will be removed by DCE.
288 */
289 rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
290 rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
291 }
292
293 r3xx_compile_vertex_program(&compiler);
294
295 if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
296 rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
297 }
298
299 vp->error = compiler.Base.Error;
300
301 vp->Base->Base.InputsRead = vp->code.InputsRead;
302 vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
303
304 rc_destroy(&compiler.Base);
305
306 return vp;
307 }
308
309 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
310 {
311 r300ContextPtr r300 = R300_CONTEXT(ctx);
312 struct r300_vertex_program_key wanted_key = { 0 };
313 struct r300_vertex_program_cont *vpc;
314 struct r300_vertex_program *vp;
315
316 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
317
318 if (!r300->selected_fp) {
319 /* This can happen when GetProgramiv is called to check
320 * whether the program runs natively.
321 *
322 * To be honest, this is not a very good solution,
323 * but solving the problem of reporting good values
324 * for those queries is tough anyway considering that
325 * we recompile vertex programs based on the precise
326 * fragment program that is in use.
327 */
328 r300SelectAndTranslateFragmentShader(ctx);
329 }
330
331 assert(r300->selected_fp);
332 wanted_key.FpReads = r300->selected_fp->InputsRead;
333 wanted_key.FogAttr = r300->selected_fp->fog_attr;
334 wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
335
336 for (vp = vpc->progs; vp; vp = vp->next) {
337 if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
338 return r300->selected_vp = vp;
339 }
340 }
341
342 vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
343 vp->next = vpc->progs;
344 vpc->progs = vp;
345
346 return r300->selected_vp = vp;
347 }
348
349 #define bump_vpu_count(ptr, new_count) do { \
350 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
351 int _nc=(new_count)/4; \
352 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
353 } while(0)
354
355 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
356 {
357 int i;
358
359 assert((code->length > 0) && (code->length % 4 == 0));
360
361 switch ((dest >> 8) & 0xf) {
362 case 0:
363 R300_STATECHANGE(r300, vpi);
364 for (i = 0; i < code->length; i++)
365 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
366 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
367 break;
368 case 2:
369 R300_STATECHANGE(r300, vpp);
370 for (i = 0; i < code->length; i++)
371 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
372 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
373 break;
374 case 4:
375 R300_STATECHANGE(r300, vps);
376 for (i = 0; i < code->length; i++)
377 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
378 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
379 break;
380 default:
381 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
382 exit(-1);
383 }
384 }
385
386 void r300SetupVertexProgram(r300ContextPtr rmesa)
387 {
388 GLcontext *ctx = rmesa->radeon.glCtx;
389 struct r300_vertex_program *prog = rmesa->selected_vp;
390 int inst_count = 0;
391 int param_count = 0;
392
393 /* Reset state, in case we don't use something */
394 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
395 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
396 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
397
398 R300_STATECHANGE(rmesa, vap_cntl);
399 R300_STATECHANGE(rmesa, vpp);
400 param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
401 if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
402 WARN_ONCE("Too many VP params, expect rendering errors\n");
403 }
404 /* Prevent the overflow (vpu.count is u8) */
405 bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
406 param_count /= 4;
407
408 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
409 inst_count = (prog->code.length / 4) - 1;
410
411 r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
412 _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
413
414 R300_STATECHANGE(rmesa, pvs);
415 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
416 (inst_count << R300_PVS_LAST_INST_SHIFT);
417
418 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
419 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
420 }