Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
41 #include "tnl/tnl.h"
42
43 #include "compiler/radeon_compiler.h"
44 #include "compiler/radeon_nqssadce.h"
45 #include "r300_context.h"
46 #include "r300_state.h"
47
48 /**
49 * Write parameter array for the given vertex program into dst.
50 * Return the total number of components written.
51 */
52 static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
53 {
54 int i;
55
56 if (vp->Base->IsNVProgram) {
57 _mesa_load_tracked_matrices(ctx);
58 } else {
59 if (vp->Base->Base.Parameters) {
60 _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
61 }
62 }
63
64 if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
65 /* Should have checked this earlier... */
66 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
67 _mesa_exit(-1);
68 }
69
70 for(i = 0; i < vp->code.constants.Count; ++i) {
71 const float * src = 0;
72 const struct rc_constant * constant = &vp->code.constants.Constants[i];
73
74 switch(constant->Type) {
75 case RC_CONSTANT_EXTERNAL:
76 if (vp->Base->IsNVProgram) {
77 src = ctx->VertexProgram.Parameters[constant->u.External];
78 } else {
79 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
80 }
81 break;
82
83 case RC_CONSTANT_IMMEDIATE:
84 src = constant->u.Immediate;
85 break;
86 }
87
88 dst[4*i] = src[0];
89 dst[4*i + 1] = src[1];
90 dst[4*i + 2] = src[2];
91 dst[4*i + 3] = src[3];
92 }
93
94 return 4 * vp->code.constants.Count;
95 }
96
97 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
98 {
99 GLbitfield outputs = 0;
100 int i;
101
102 #define ADD_OUTPUT(fp_attr, vp_result) \
103 do { \
104 if (fpreads & (1 << (fp_attr))) \
105 outputs |= (1 << (vp_result)); \
106 } while (0)
107
108 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
109 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
110
111 for (i = 0; i <= 7; ++i) {
112 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
113 }
114
115 #undef ADD_OUTPUT
116
117 if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
118 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
119 outputs |= 1 << VERT_RESULT_BFC0;
120 if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
121 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
122 outputs |= 1 << VERT_RESULT_BFC1;
123
124 outputs |= 1 << VERT_RESULT_HPOS;
125 if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
126 outputs |= 1 << VERT_RESULT_PSIZ;
127
128 return outputs;
129 }
130
131
132 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
133 {
134 int i;
135 int cur_reg;
136 GLuint OutputsWritten, InputsRead;
137
138 OutputsWritten = c->Base.Program.OutputsWritten;
139 InputsRead = c->Base.Program.InputsRead;
140
141 cur_reg = -1;
142 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
143 if (InputsRead & (1 << i))
144 c->code->inputs[i] = ++cur_reg;
145 else
146 c->code->inputs[i] = -1;
147 }
148
149 cur_reg = 0;
150 for (i = 0; i < VERT_RESULT_MAX; i++)
151 c->code->outputs[i] = -1;
152
153 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
154
155 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
156 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
157 }
158
159 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
160 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
161 }
162
163 /* If we're writing back facing colors we need to send
164 * four colors to make front/back face colors selection work.
165 * If the vertex program doesn't write all 4 colors, lets
166 * pretend it does by skipping output index reg so the colors
167 * get written into appropriate output vectors.
168 */
169 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
170 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
171 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
172 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
173 cur_reg++;
174 }
175
176 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
177 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
178 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
179 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
180 cur_reg++;
181 }
182
183 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
184 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
185 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
186 cur_reg++;
187 }
188
189 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
190 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
191 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
192 cur_reg++;
193 }
194
195 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
196 if (OutputsWritten & (1 << i)) {
197 c->code->outputs[i] = cur_reg++;
198 }
199 }
200
201 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
202 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
203 }
204 }
205
206 /**
207 * The NV_vertex_program spec mandates that all registers be
208 * initialized to zero. We do this here unconditionally.
209 *
210 * \note We rely on dead-code elimination in the compiler.
211 */
212 static void initialize_NV_registers(struct radeon_compiler * compiler)
213 {
214 unsigned int reg;
215 struct rc_instruction * inst;
216
217 for(reg = 0; reg < 12; ++reg) {
218 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
219 inst->I.Opcode = OPCODE_MOV;
220 inst->I.DstReg.File = PROGRAM_TEMPORARY;
221 inst->I.DstReg.Index = reg;
222 inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
223 inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
224 }
225
226 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
227 inst->I.Opcode = OPCODE_ARL;
228 inst->I.DstReg.File = PROGRAM_ADDRESS;
229 inst->I.DstReg.Index = 0;
230 inst->I.DstReg.WriteMask = WRITEMASK_X;
231 inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
232 inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
233 }
234
235 static struct r300_vertex_program *build_program(GLcontext *ctx,
236 struct r300_vertex_program_key *wanted_key,
237 const struct gl_vertex_program *mesa_vp)
238 {
239 struct r300_vertex_program *vp;
240 struct r300_vertex_program_compiler compiler;
241
242 vp = _mesa_calloc(sizeof(*vp));
243 vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
244 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
245
246 rc_init(&compiler.Base);
247 compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
248
249 compiler.code = &vp->code;
250 compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
251 compiler.SetHwInputOutput = &t_inputs_outputs;
252
253 if (compiler.Base.Debug) {
254 fprintf(stderr, "Initial vertex program:\n");
255 _mesa_print_program(&vp->Base->Base);
256 fflush(stderr);
257 }
258
259 if (mesa_vp->IsPositionInvariant) {
260 _mesa_insert_mvp_code(ctx, vp->Base);
261 }
262
263 rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
264
265 if (mesa_vp->IsNVProgram)
266 initialize_NV_registers(&compiler.Base);
267
268 rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
269
270 if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
271 rc_copy_output(&compiler.Base,
272 VERT_RESULT_HPOS,
273 vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
274 }
275
276 if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
277 rc_move_output(&compiler.Base,
278 VERT_RESULT_FOGC,
279 vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
280 }
281
282 r3xx_compile_vertex_program(&compiler);
283 vp->error = compiler.Base.Error;
284
285 vp->Base->Base.InputsRead = vp->code.InputsRead;
286 vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
287
288 rc_destroy(&compiler.Base);
289
290 return vp;
291 }
292
293 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
294 {
295 r300ContextPtr r300 = R300_CONTEXT(ctx);
296 struct r300_vertex_program_key wanted_key = { 0 };
297 struct r300_vertex_program_cont *vpc;
298 struct r300_vertex_program *vp;
299
300 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
301 wanted_key.FpReads = r300->selected_fp->InputsRead;
302 wanted_key.FogAttr = r300->selected_fp->fog_attr;
303 wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
304
305 for (vp = vpc->progs; vp; vp = vp->next) {
306 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
307 == 0) {
308 return r300->selected_vp = vp;
309 }
310 }
311
312 vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
313 vp->next = vpc->progs;
314 vpc->progs = vp;
315
316 return r300->selected_vp = vp;
317 }
318
319 #define bump_vpu_count(ptr, new_count) do { \
320 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
321 int _nc=(new_count)/4; \
322 assert(_nc < 256); \
323 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
324 } while(0)
325
326 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
327 {
328 int i;
329
330 assert((code->length > 0) && (code->length % 4 == 0));
331
332 R300_STATECHANGE( r300, vap_flush );
333
334 switch ((dest >> 8) & 0xf) {
335 case 0:
336 R300_STATECHANGE(r300, vpi);
337 for (i = 0; i < code->length; i++)
338 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
339 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
340 break;
341 case 2:
342 R300_STATECHANGE(r300, vpp);
343 for (i = 0; i < code->length; i++)
344 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
345 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
346 break;
347 case 4:
348 R300_STATECHANGE(r300, vps);
349 for (i = 0; i < code->length; i++)
350 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
351 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
352 break;
353 default:
354 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
355 _mesa_exit(-1);
356 }
357 }
358
359 void r300SetupVertexProgram(r300ContextPtr rmesa)
360 {
361 GLcontext *ctx = rmesa->radeon.glCtx;
362 struct r300_vertex_program *prog = rmesa->selected_vp;
363 int inst_count = 0;
364 int param_count = 0;
365
366 /* Reset state, in case we don't use something */
367 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
368 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
369 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
370
371 R300_STATECHANGE(rmesa, vap_flush);
372 R300_STATECHANGE(rmesa, vpp);
373 param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
374 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
375 param_count /= 4;
376
377 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
378 inst_count = (prog->code.length / 4) - 1;
379
380 r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
381 _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
382
383 R300_STATECHANGE(rmesa, pvs);
384 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
385 (inst_count << R300_PVS_LAST_INST_SHIFT);
386
387 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
388 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
389 }