r300/compiler: Refactor to allow different instruction types
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
41 #include "tnl/tnl.h"
42
43 #include "compiler/radeon_compiler.h"
44 #include "radeon_mesa_to_rc.h"
45 #include "r300_context.h"
46 #include "r300_fragprog_common.h"
47 #include "r300_state.h"
48
49 /**
50 * Write parameter array for the given vertex program into dst.
51 * Return the total number of components written.
52 */
53 static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
54 {
55 int i;
56
57 if (vp->Base->IsNVProgram) {
58 _mesa_load_tracked_matrices(ctx);
59 } else {
60 if (vp->Base->Base.Parameters) {
61 _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
62 }
63 }
64
65 if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
66 /* Should have checked this earlier... */
67 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
68 _mesa_exit(-1);
69 }
70
71 for(i = 0; i < vp->code.constants.Count; ++i) {
72 const float * src = 0;
73 const struct rc_constant * constant = &vp->code.constants.Constants[i];
74
75 switch(constant->Type) {
76 case RC_CONSTANT_EXTERNAL:
77 if (vp->Base->IsNVProgram) {
78 src = ctx->VertexProgram.Parameters[constant->u.External];
79 } else {
80 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
81 }
82 break;
83
84 case RC_CONSTANT_IMMEDIATE:
85 src = constant->u.Immediate;
86 break;
87 }
88
89 dst[4*i] = src[0];
90 dst[4*i + 1] = src[1];
91 dst[4*i + 2] = src[2];
92 dst[4*i + 3] = src[3];
93 }
94
95 return 4 * vp->code.constants.Count;
96 }
97
98 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
99 {
100 GLbitfield outputs = 0;
101 int i;
102
103 #define ADD_OUTPUT(fp_attr, vp_result) \
104 do { \
105 if (fpreads & (1 << (fp_attr))) \
106 outputs |= (1 << (vp_result)); \
107 } while (0)
108
109 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
110 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
111
112 for (i = 0; i <= 7; ++i) {
113 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
114 }
115
116 #undef ADD_OUTPUT
117
118 if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
119 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
120 outputs |= 1 << VERT_RESULT_BFC0;
121 if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
122 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
123 outputs |= 1 << VERT_RESULT_BFC1;
124
125 outputs |= 1 << VERT_RESULT_HPOS;
126 if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
127 outputs |= 1 << VERT_RESULT_PSIZ;
128
129 return outputs;
130 }
131
132
133 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
134 {
135 int i;
136 int cur_reg;
137 GLuint OutputsWritten, InputsRead;
138
139 OutputsWritten = c->Base.Program.OutputsWritten;
140 InputsRead = c->Base.Program.InputsRead;
141
142 cur_reg = -1;
143 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
144 if (InputsRead & (1 << i))
145 c->code->inputs[i] = ++cur_reg;
146 else
147 c->code->inputs[i] = -1;
148 }
149
150 cur_reg = 0;
151 for (i = 0; i < VERT_RESULT_MAX; i++)
152 c->code->outputs[i] = -1;
153
154 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
155
156 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
157 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
158 }
159
160 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
161 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
162 }
163
164 /* If we're writing back facing colors we need to send
165 * four colors to make front/back face colors selection work.
166 * If the vertex program doesn't write all 4 colors, lets
167 * pretend it does by skipping output index reg so the colors
168 * get written into appropriate output vectors.
169 */
170 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
171 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
172 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
173 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
174 cur_reg++;
175 }
176
177 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
178 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
179 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
180 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
181 cur_reg++;
182 }
183
184 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
185 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
186 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
187 cur_reg++;
188 }
189
190 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
191 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
192 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
193 cur_reg++;
194 }
195
196 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
197 if (OutputsWritten & (1 << i)) {
198 c->code->outputs[i] = cur_reg++;
199 }
200 }
201
202 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
203 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
204 }
205 }
206
207 /**
208 * The NV_vertex_program spec mandates that all registers be
209 * initialized to zero. We do this here unconditionally.
210 *
211 * \note We rely on dead-code elimination in the compiler.
212 */
213 static void initialize_NV_registers(struct radeon_compiler * compiler)
214 {
215 unsigned int reg;
216 struct rc_instruction * inst;
217
218 for(reg = 0; reg < 12; ++reg) {
219 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
220 inst->U.I.Opcode = RC_OPCODE_MOV;
221 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
222 inst->U.I.DstReg.Index = reg;
223 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
224 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
225 }
226
227 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
228 inst->U.I.Opcode = RC_OPCODE_ARL;
229 inst->U.I.DstReg.File = RC_FILE_ADDRESS;
230 inst->U.I.DstReg.Index = 0;
231 inst->U.I.DstReg.WriteMask = WRITEMASK_X;
232 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
233 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
234 }
235
236 static struct r300_vertex_program *build_program(GLcontext *ctx,
237 struct r300_vertex_program_key *wanted_key,
238 const struct gl_vertex_program *mesa_vp)
239 {
240 struct r300_vertex_program *vp;
241 struct r300_vertex_program_compiler compiler;
242
243 vp = _mesa_calloc(sizeof(*vp));
244 vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
245 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
246
247 rc_init(&compiler.Base);
248 compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
249
250 compiler.code = &vp->code;
251 compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
252 compiler.SetHwInputOutput = &t_inputs_outputs;
253
254 if (compiler.Base.Debug) {
255 fprintf(stderr, "Initial vertex program:\n");
256 _mesa_print_program(&vp->Base->Base);
257 fflush(stderr);
258 }
259
260 if (mesa_vp->IsPositionInvariant) {
261 _mesa_insert_mvp_code(ctx, vp->Base);
262 }
263
264 radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
265
266 if (mesa_vp->IsNVProgram)
267 initialize_NV_registers(&compiler.Base);
268
269 rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
270
271 if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
272 rc_copy_output(&compiler.Base,
273 VERT_RESULT_HPOS,
274 vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
275 }
276
277 if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
278 rc_move_output(&compiler.Base,
279 VERT_RESULT_FOGC,
280 vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
281 }
282
283 r3xx_compile_vertex_program(&compiler);
284 vp->error = compiler.Base.Error;
285
286 vp->Base->Base.InputsRead = vp->code.InputsRead;
287 vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
288
289 rc_destroy(&compiler.Base);
290
291 return vp;
292 }
293
294 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
295 {
296 r300ContextPtr r300 = R300_CONTEXT(ctx);
297 struct r300_vertex_program_key wanted_key = { 0 };
298 struct r300_vertex_program_cont *vpc;
299 struct r300_vertex_program *vp;
300
301 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
302
303 if (!r300->selected_fp) {
304 /* This can happen when GetProgramiv is called to check
305 * whether the program runs natively.
306 *
307 * To be honest, this is not a very good solution,
308 * but solving the problem of reporting good values
309 * for those queries is tough anyway considering that
310 * we recompile vertex programs based on the precise
311 * fragment program that is in use.
312 */
313 r300SelectAndTranslateFragmentShader(ctx);
314 }
315
316 wanted_key.FpReads = r300->selected_fp->InputsRead;
317 wanted_key.FogAttr = r300->selected_fp->fog_attr;
318 wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
319
320 for (vp = vpc->progs; vp; vp = vp->next) {
321 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
322 == 0) {
323 return r300->selected_vp = vp;
324 }
325 }
326
327 vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
328 vp->next = vpc->progs;
329 vpc->progs = vp;
330
331 return r300->selected_vp = vp;
332 }
333
334 #define bump_vpu_count(ptr, new_count) do { \
335 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
336 int _nc=(new_count)/4; \
337 assert(_nc < 256); \
338 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
339 } while(0)
340
341 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
342 {
343 int i;
344
345 assert((code->length > 0) && (code->length % 4 == 0));
346
347 R300_STATECHANGE( r300, vap_flush );
348
349 switch ((dest >> 8) & 0xf) {
350 case 0:
351 R300_STATECHANGE(r300, vpi);
352 for (i = 0; i < code->length; i++)
353 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
354 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
355 break;
356 case 2:
357 R300_STATECHANGE(r300, vpp);
358 for (i = 0; i < code->length; i++)
359 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
360 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
361 break;
362 case 4:
363 R300_STATECHANGE(r300, vps);
364 for (i = 0; i < code->length; i++)
365 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
366 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
367 break;
368 default:
369 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
370 _mesa_exit(-1);
371 }
372 }
373
374 void r300SetupVertexProgram(r300ContextPtr rmesa)
375 {
376 GLcontext *ctx = rmesa->radeon.glCtx;
377 struct r300_vertex_program *prog = rmesa->selected_vp;
378 int inst_count = 0;
379 int param_count = 0;
380
381 /* Reset state, in case we don't use something */
382 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
383 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
384 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
385
386 R300_STATECHANGE(rmesa, vap_flush);
387 R300_STATECHANGE(rmesa, vpp);
388 param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
389 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
390 param_count /= 4;
391
392 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
393 inst_count = (prog->code.length / 4) - 1;
394
395 r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
396 _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
397
398 R300_STATECHANGE(rmesa, pvs);
399 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
400 (inst_count << R300_PVS_LAST_INST_SHIFT);
401
402 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
403 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
404 }