9ee26286d9b390b7a80efc232fee592c8bd85036
[mesa.git] / src / mesa / drivers / dri / r600 / r700_vertprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "tnl/t_context.h"
38 #include "shader/program.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
41
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "shader/programopt.c"
46
47 #include "r700_debug.h"
48 #include "r700_vertprog.h"
49
50 unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
51 struct gl_vertex_program *mesa_vp,
52 unsigned int unStart)
53 {
54 unsigned int i;
55 unsigned int unBit;
56 unsigned int unTotal = unStart;
57
58 //!!!!!!! THE ORDER MATCH FS INPUT
59
60 unBit = 1 << VERT_RESULT_HPOS;
61 if(mesa_vp->Base.OutputsWritten & unBit)
62 {
63 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
64 }
65
66 unBit = 1 << VERT_RESULT_COL0;
67 if(mesa_vp->Base.OutputsWritten & unBit)
68 {
69 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
70 }
71
72 unBit = 1 << VERT_RESULT_COL1;
73 if(mesa_vp->Base.OutputsWritten & unBit)
74 {
75 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
76 }
77
78 //TODO : dealing back face.
79 unBit = 1 << VERT_RESULT_BFC0;
80 if(mesa_vp->Base.OutputsWritten & unBit)
81 {
82 pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
83 }
84
85 unBit = 1 << VERT_RESULT_BFC1;
86 if(mesa_vp->Base.OutputsWritten & unBit)
87 {
88 pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
89 }
90
91 //TODO : dealing fog.
92 unBit = 1 << VERT_RESULT_FOGC;
93 if(mesa_vp->Base.OutputsWritten & unBit)
94 {
95 pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
96 }
97
98 //TODO : dealing point size.
99 unBit = 1 << VERT_RESULT_PSIZ;
100 if(mesa_vp->Base.OutputsWritten & unBit)
101 {
102 pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
103 }
104
105 for(i=0; i<8; i++)
106 {
107 unBit = 1 << (VERT_RESULT_TEX0 + i);
108 if(mesa_vp->Base.OutputsWritten & unBit)
109 {
110 pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
111 }
112 }
113
114 return (unTotal - unStart);
115 }
116
117 unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
118 struct gl_vertex_program *mesa_vp,
119 unsigned int unStart)
120 {
121 int i;
122 unsigned int unBit;
123 unsigned int unTotal = unStart;
124 for(i=0; i<VERT_ATTRIB_MAX; i++)
125 {
126 unBit = 1 << i;
127 if(mesa_vp->Base.InputsRead & unBit)
128 {
129 pAsm->ucVP_AttributeMap[i] = unTotal++;
130 }
131 }
132 return (unTotal - unStart);
133 }
134
135 GLboolean Process_Vertex_Program_Vfetch_Instructions(
136 struct r700_vertex_program *vp,
137 struct gl_vertex_program *mesa_vp)
138 {
139 int i;
140 unsigned int unBit;
141 VTX_FETCH_METHOD vtxFetchMethod;
142 vtxFetchMethod.bEnableMini = GL_FALSE;
143 vtxFetchMethod.mega_fetch_remainder = 0;
144
145 for(i=0; i<VERT_ATTRIB_MAX; i++)
146 {
147 unBit = 1 << i;
148 if(mesa_vp->Base.InputsRead & unBit)
149 {
150 assemble_vfetch_instruction(&vp->r700AsmCode,
151 i,
152 vp->r700AsmCode.ucVP_AttributeMap[i],
153 vp->aos_desc[i].size,
154 vp->aos_desc[i].type,
155 &vtxFetchMethod);
156 }
157 }
158
159 return GL_TRUE;
160 }
161
162 void Map_Vertex_Program(struct r700_vertex_program *vp,
163 struct gl_vertex_program *mesa_vp)
164 {
165 GLuint ui;
166 r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
167 unsigned int num_inputs;
168
169 // R0 will always be used for index into vertex buffer
170 pAsm->number_used_registers = 1;
171 pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
172
173 // Map Inputs: Add 1 to mapping since R0 is used for index
174 num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
175 pAsm->number_used_registers += num_inputs;
176
177 // Create VFETCH instructions for inputs
178 if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
179 {
180 radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
181 return; //error
182 }
183
184 // Map Outputs
185 pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
186
187 pAsm->starting_export_register_number = pAsm->number_used_registers;
188
189 pAsm->number_used_registers += pAsm->number_of_exports;
190
191 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
192
193 for(ui=0; ui<pAsm->number_of_exports; ui++)
194 {
195 pAsm->pucOutMask[ui] = 0x0;
196 }
197
198 /* Map temporary registers (GPRs) */
199 pAsm->starting_temp_register_number = pAsm->number_used_registers;
200
201 if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
202 { /* arb uses NumNativeTemporaries */
203 pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
204 }
205 else
206 { /* fix func t_vp uses NumTemporaries */
207 pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
208 }
209
210 pAsm->uFirstHelpReg = pAsm->number_used_registers;
211 }
212
213 GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
214 struct gl_vertex_program *mesa_vp)
215 {
216 GLuint i, j;
217 GLint * puiTEMPwrites;
218 struct prog_instruction *pILInst;
219 InstDeps *pInstDeps;
220
221 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
222 for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
223 {
224 puiTEMPwrites[i] = -1;
225 }
226
227 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
228
229 for(i=0; i<mesa_vp->Base.NumInstructions; i++)
230 {
231 pInstDeps[i].nDstDep = -1;
232 pILInst = &(mesa_vp->Base.Instructions[i]);
233
234 //Dst
235 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
236 {
237 //Set lastwrite for the temp
238 puiTEMPwrites[pILInst->DstReg.Index] = i;
239 }
240
241 //Src
242 for(j=0; j<3; j++)
243 {
244 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
245 {
246 //Set dep.
247 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
248 }
249 else
250 {
251 pInstDeps[i].nSrcDeps[j] = -1;
252 }
253 }
254 }
255
256 vp->r700AsmCode.pInstDeps = pInstDeps;
257
258 FREE(puiTEMPwrites);
259
260 return GL_TRUE;
261 }
262
263 struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
264 struct gl_vertex_program *mesa_vp)
265 {
266 context_t *context = R700_CONTEXT(ctx);
267 struct r700_vertex_program *vp;
268 TNLcontext *tnl = TNL_CONTEXT(ctx);
269 struct vertex_buffer *vb = &tnl->vb;
270 unsigned int unBit;
271 unsigned int i;
272
273 vp = _mesa_calloc(sizeof(*vp));
274 vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
275
276 if (mesa_vp->IsPositionInvariant)
277 {
278 _mesa_insert_mvp_code(ctx, vp->mesa_program);
279 }
280
281 for(i=0; i<VERT_ATTRIB_MAX; i++)
282 {
283 unBit = 1 << i;
284 if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
285 {
286 vp->aos_desc[i].size = vb->AttribPtr[i]->size;
287 vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
288 vp->aos_desc[i].type = GL_FLOAT;
289 }
290 }
291
292 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
293 {
294 vp->r700AsmCode.bR6xx = 1;
295 }
296
297 //Init_Program
298 Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
299 Map_Vertex_Program( vp, vp->mesa_program );
300
301 if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
302 {
303 return NULL;
304 }
305
306 if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
307 &(vp->mesa_program->Base.Instructions[0]),
308 &(vp->r700AsmCode)) )
309 {
310 return NULL;
311 }
312
313 if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
314 {
315 return NULL;
316 }
317
318 vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
319 : (vp->r700AsmCode.number_used_registers - 1);
320
321 vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
322
323 vp->translated = GL_TRUE;
324
325 return vp;
326 }
327
328 void r700SelectVertexShader(GLcontext *ctx)
329 {
330 context_t *context = R700_CONTEXT(ctx);
331 struct r700_vertex_program_cont *vpc;
332 struct r700_vertex_program *vp;
333 TNLcontext *tnl = TNL_CONTEXT(ctx);
334 struct vertex_buffer *vb = &tnl->vb;
335 unsigned int unBit;
336 unsigned int i;
337 GLboolean match;
338 GLbitfield InputsRead;
339
340 vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
341
342 InputsRead = vpc->mesa_program.Base.InputsRead;
343 if (vpc->mesa_program.IsPositionInvariant)
344 {
345 InputsRead |= VERT_BIT_POS;
346 }
347
348 for (vp = vpc->progs; vp; vp = vp->next)
349 {
350 match = GL_TRUE;
351 for(i=0; i<VERT_ATTRIB_MAX; i++)
352 {
353 unBit = 1 << i;
354 if(InputsRead & unBit)
355 {
356 if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
357 match = GL_FALSE;
358 break;
359 }
360 }
361 if (match)
362 {
363 context->selected_vp = vp;
364 return;
365 }
366 }
367
368 vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) );
369 if(!vp)
370 {
371 radeon_error("Failed to translate vertex shader. \n");
372 return;
373 }
374 vp->next = vpc->progs;
375 vpc->progs = vp;
376 context->selected_vp = vp;
377 return;
378 }
379
380 void * r700GetActiveVpShaderBo(GLcontext * ctx)
381 {
382 context_t *context = R700_CONTEXT(ctx);
383 struct r700_vertex_program *vp = context->selected_vp;;
384
385 if (vp)
386 return vp->shaderbo;
387 else
388 return NULL;
389 }
390
391 GLboolean r700SetupVertexProgram(GLcontext * ctx)
392 {
393 context_t *context = R700_CONTEXT(ctx);
394 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
395 struct r700_vertex_program *vp = context->selected_vp;
396
397 struct gl_program_parameter_list *paramList;
398 unsigned int unNumParamData;
399 unsigned int ui;
400
401 if(GL_FALSE == vp->loaded)
402 {
403 if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
404 {
405 Assemble( &(vp->r700Shader) );
406 }
407
408 /* Load vp to gpu */
409 r600EmitShader(ctx,
410 &(vp->shaderbo),
411 (GLvoid *)(vp->r700Shader.pProgram),
412 vp->r700Shader.uShaderBinaryDWORDSize,
413 "VS");
414
415 vp->loaded = GL_TRUE;
416 }
417
418 DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
419 vp->r700Shader.uShaderBinaryDWORDSize);
420
421 /* TODO : enable this after MemUse fixed *=
422 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
423 */
424
425 R600_STATECHANGE(context, vs);
426 R600_STATECHANGE(context, fs); /* hack */
427
428 r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
429 SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
430
431 r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
432
433 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
434 NUM_GPRS_shift, NUM_GPRS_mask);
435
436 if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
437 {
438 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
439 STACK_SIZE_shift, STACK_SIZE_mask);
440 }
441
442 R600_STATECHANGE(context, spi);
443
444 SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
445 vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
446 VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
447 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
448 NUM_INTERP_shift, NUM_INTERP_mask);
449
450 /*
451 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
452 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
453 */
454
455 /* sent out shader constants. */
456 paramList = vp->mesa_program->Base.Parameters;
457
458 if(NULL != paramList) {
459 _mesa_load_state_parameters(ctx, paramList);
460
461 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
462 return GL_FALSE;
463
464 R600_STATECHANGE(context, vs_consts);
465
466 r700->vs.num_consts = paramList->NumParameters;
467
468 unNumParamData = paramList->NumParameters;
469
470 for(ui=0; ui<unNumParamData; ui++) {
471 r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
472 r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
473 r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
474 r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
475 }
476 } else
477 r700->vs.num_consts = 0;
478
479 return GL_TRUE;
480 }