c5edbd0052b51e2790cd603c98c16f7bc1f7f18b
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
1 /**************************************************************************
2
3 Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
4 Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 on the rights to use, copy, modify, merge, publish, distribute, sub
12 license, and/or sell copies of the Software, and to permit persons to whom
13 the Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice (including the next
16 paragraph) shall be included in all copies or substantial portions of the
17 Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
25 USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **************************************************************************/
28
29 /* Radeon R5xx Acceleration, Revision 1.2 */
30
31 #include "main/glheader.h"
32 #include "main/macros.h"
33 #include "main/enums.h"
34 #include "shader/program.h"
35 #include "shader/programopt.h"
36 #include "shader/prog_instruction.h"
37 #include "shader/prog_optimize.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_print.h"
40 #include "shader/prog_statevars.h"
41 #include "tnl/tnl.h"
42
43 #include "compiler/radeon_compiler.h"
44 #include "compiler/radeon_nqssadce.h"
45 #include "r300_context.h"
46 #include "r300_state.h"
47
48 /**
49 * Write parameter array for the given vertex program into dst.
50 * Return the total number of components written.
51 */
52 static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
53 {
54 int i;
55
56 if (vp->Base->IsNVProgram) {
57 _mesa_load_tracked_matrices(ctx);
58 } else {
59 if (vp->Base->Base.Parameters) {
60 _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
61 }
62 }
63
64 if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
65 /* Should have checked this earlier... */
66 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
67 _mesa_exit(-1);
68 }
69
70 for(i = 0; i < vp->code.constants.Count; ++i) {
71 const float * src = 0;
72 const struct rc_constant * constant = &vp->code.constants.Constants[i];
73
74 switch(constant->Type) {
75 case RC_CONSTANT_EXTERNAL:
76 if (vp->Base->IsNVProgram) {
77 src = ctx->VertexProgram.Parameters[constant->u.External];
78 } else {
79 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
80 }
81 break;
82
83 case RC_CONSTANT_IMMEDIATE:
84 src = constant->u.Immediate;
85 break;
86 }
87
88 dst[4*i] = src[0];
89 dst[4*i + 1] = src[1];
90 dst[4*i + 2] = src[2];
91 dst[4*i + 3] = src[3];
92 }
93
94 return 4 * vp->code.constants.Count;
95 }
96
97 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
98 {
99 GLbitfield outputs = 0;
100 int i;
101
102 #define ADD_OUTPUT(fp_attr, vp_result) \
103 do { \
104 if (fpreads & (1 << (fp_attr))) \
105 outputs |= (1 << (vp_result)); \
106 } while (0)
107
108 ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
109 ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
110
111 for (i = 0; i <= 7; ++i) {
112 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
113 }
114
115 #undef ADD_OUTPUT
116
117 if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
118 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
119 outputs |= 1 << VERT_RESULT_BFC0;
120 if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
121 (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
122 outputs |= 1 << VERT_RESULT_BFC1;
123
124 outputs |= 1 << VERT_RESULT_HPOS;
125 if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
126 outputs |= 1 << VERT_RESULT_PSIZ;
127
128 return outputs;
129 }
130
131
132 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
133 {
134 int i;
135 int cur_reg;
136 GLuint OutputsWritten, InputsRead;
137
138 OutputsWritten = c->Base.Program.OutputsWritten;
139 InputsRead = c->Base.Program.InputsRead;
140
141 cur_reg = -1;
142 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
143 if (InputsRead & (1 << i))
144 c->code->inputs[i] = ++cur_reg;
145 else
146 c->code->inputs[i] = -1;
147 }
148
149 cur_reg = 0;
150 for (i = 0; i < VERT_RESULT_MAX; i++)
151 c->code->outputs[i] = -1;
152
153 assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
154
155 if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
156 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
157 }
158
159 if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
160 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
161 }
162
163 /* If we're writing back facing colors we need to send
164 * four colors to make front/back face colors selection work.
165 * If the vertex program doesn't write all 4 colors, lets
166 * pretend it does by skipping output index reg so the colors
167 * get written into appropriate output vectors.
168 */
169 if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
170 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
171 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
172 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
173 cur_reg++;
174 }
175
176 if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
177 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
178 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
179 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
180 cur_reg++;
181 }
182
183 if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
184 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
185 } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
186 cur_reg++;
187 }
188
189 if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
190 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
191 } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
192 cur_reg++;
193 }
194
195 for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
196 if (OutputsWritten & (1 << i)) {
197 c->code->outputs[i] = cur_reg++;
198 }
199 }
200
201 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
202 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
203 }
204 }
205
206
207 static struct r300_vertex_program *build_program(GLcontext *ctx,
208 struct r300_vertex_program_key *wanted_key,
209 const struct gl_vertex_program *mesa_vp)
210 {
211 struct r300_vertex_program *vp;
212 struct r300_vertex_program_compiler compiler;
213
214 vp = _mesa_calloc(sizeof(*vp));
215 vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
216 _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
217
218 rc_init(&compiler.Base);
219 compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE;
220
221 compiler.code = &vp->code;
222 compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
223 compiler.SetHwInputOutput = &t_inputs_outputs;
224
225 if (compiler.Base.Debug) {
226 fprintf(stderr, "Initial vertex program:\n");
227 _mesa_print_program(&vp->Base->Base);
228 fflush(stdout);
229 }
230
231 if (mesa_vp->IsPositionInvariant) {
232 _mesa_insert_mvp_code(ctx, vp->Base);
233 }
234
235 rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
236
237 rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
238
239 if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
240 rc_copy_output(&compiler.Base,
241 VERT_RESULT_HPOS,
242 vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
243 }
244
245 if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
246 rc_move_output(&compiler.Base,
247 VERT_RESULT_FOGC,
248 vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
249 }
250
251 r3xx_compile_vertex_program(&compiler);
252 vp->error = compiler.Base.Error;
253
254 vp->Base->Base.InputsRead = vp->code.InputsRead;
255 vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
256
257 rc_destroy(&compiler.Base);
258
259 return vp;
260 }
261
262 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
263 {
264 r300ContextPtr r300 = R300_CONTEXT(ctx);
265 struct r300_vertex_program_key wanted_key = { 0 };
266 struct r300_vertex_program_cont *vpc;
267 struct r300_vertex_program *vp;
268
269 vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
270 wanted_key.FpReads = r300->selected_fp->InputsRead;
271 wanted_key.FogAttr = r300->selected_fp->fog_attr;
272 wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
273
274 for (vp = vpc->progs; vp; vp = vp->next) {
275 if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
276 == 0) {
277 return r300->selected_vp = vp;
278 }
279 }
280
281 vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
282 vp->next = vpc->progs;
283 vpc->progs = vp;
284
285 return r300->selected_vp = vp;
286 }
287
288 #define bump_vpu_count(ptr, new_count) do { \
289 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
290 int _nc=(new_count)/4; \
291 assert(_nc < 256); \
292 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
293 } while(0)
294
295 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
296 {
297 int i;
298
299 assert((code->length > 0) && (code->length % 4 == 0));
300
301 switch ((dest >> 8) & 0xf) {
302 case 0:
303 R300_STATECHANGE(r300, vpi);
304 for (i = 0; i < code->length; i++)
305 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
306 bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
307 break;
308 case 2:
309 R300_STATECHANGE(r300, vpp);
310 for (i = 0; i < code->length; i++)
311 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
312 bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
313 break;
314 case 4:
315 R300_STATECHANGE(r300, vps);
316 for (i = 0; i < code->length; i++)
317 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
318 bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
319 break;
320 default:
321 fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
322 _mesa_exit(-1);
323 }
324 }
325
326 void r300SetupVertexProgram(r300ContextPtr rmesa)
327 {
328 GLcontext *ctx = rmesa->radeon.glCtx;
329 struct r300_vertex_program *prog = rmesa->selected_vp;
330 int inst_count = 0;
331 int param_count = 0;
332
333 /* Reset state, in case we don't use something */
334 ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
335 ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
336 ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
337
338 R300_STATECHANGE(rmesa, vpp);
339 param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
340 bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
341 param_count /= 4;
342
343 r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
344 inst_count = (prog->code.length / 4) - 1;
345
346 r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
347 _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
348
349 R300_STATECHANGE(rmesa, pvs);
350 rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
351 (inst_count << R300_PVS_LAST_INST_SHIFT);
352
353 rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
354 rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
355 }