Fix r6 code bugs.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_vertprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "tnl/t_context.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_statevars.h"
40
41 #include "r600_context.h"
42 #include "r600_cmdbuf.h"
43
44 #include "r700_chip.h"
45 #include "r700_debug.h"
46 #include "r700_vertprog.h"
47
48 unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
49 struct gl_vertex_program *mesa_vp,
50 unsigned int unStart)
51 {
52 unsigned int i;
53 unsigned int unBit;
54 unsigned int unTotal = unStart;
55
56 //!!!!!!! THE ORDER MATCH FS INPUT
57
58 unBit = 1 << VERT_RESULT_HPOS;
59 if(mesa_vp->Base.OutputsWritten & unBit)
60 {
61 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
62 }
63
64 unBit = 1 << VERT_RESULT_COL0;
65 if(mesa_vp->Base.OutputsWritten & unBit)
66 {
67 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
68 }
69
70 unBit = 1 << VERT_RESULT_COL1;
71 if(mesa_vp->Base.OutputsWritten & unBit)
72 {
73 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
74 }
75
76 //TODO : dealing back face.
77 //unBit = 1 << VERT_RESULT_BFC0;
78 //if(mesa_vp->Base.OutputsWritten & unBit)
79 //{
80 // pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
81 //}
82
83 //unBit = 1 << VERT_RESULT_BFC1;
84 //if(mesa_vp->Base.OutputsWritten & unBit)
85 //{
86 // pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
87 //}
88
89 //TODO : dealing fog.
90 //unBit = 1 << VERT_RESULT_FOGC;
91 //if(mesa_vp->Base.OutputsWritten & unBit)
92 //{
93 // pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
94 //}
95
96 //TODO : dealing point size.
97 //unBit = 1 << VERT_RESULT_PSIZ;
98 //if(mesa_vp->Base.OutputsWritten & unBit)
99 //{
100 // pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
101 //}
102
103 for(i=0; i<8; i++)
104 {
105 unBit = 1 << (VERT_RESULT_TEX0 + i);
106 if(mesa_vp->Base.OutputsWritten & unBit)
107 {
108 pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
109 }
110 }
111
112 return (unTotal - unStart);
113 }
114
115 unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
116 struct gl_vertex_program *mesa_vp,
117 unsigned int unStart)
118 {
119 int i;
120 unsigned int unBit;
121 unsigned int unTotal = unStart;
122 for(i=0; i<VERT_ATTRIB_MAX; i++)
123 {
124 unBit = 1 << i;
125 if(mesa_vp->Base.InputsRead & unBit)
126 {
127 pAsm->ucVP_AttributeMap[i] = unTotal++;
128 }
129 }
130 return (unTotal - unStart);
131 }
132
133 GLboolean Process_Vertex_Program_Vfetch_Instructions(
134 struct r700_vertex_program *vp,
135 struct gl_vertex_program *mesa_vp)
136 {
137 int i;
138 unsigned int unBit;
139 VTX_FETCH_METHOD vtxFetchMethod;
140 vtxFetchMethod.bEnableMini = GL_FALSE;
141 vtxFetchMethod.mega_fetch_remainder = 0;
142
143 for(i=0; i<VERT_ATTRIB_MAX; i++)
144 {
145 unBit = 1 << i;
146 if(mesa_vp->Base.InputsRead & unBit)
147 {
148 assemble_vfetch_instruction(&vp->r700AsmCode,
149 i,
150 vp->r700AsmCode.ucVP_AttributeMap[i],
151 vp->aos_desc[i].size,
152 vp->aos_desc[i].type,
153 &vtxFetchMethod);
154 }
155 }
156
157 return GL_TRUE;
158 }
159
160 void Map_Vertex_Program(struct r700_vertex_program *vp,
161 struct gl_vertex_program *mesa_vp)
162 {
163 GLuint ui;
164 r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
165 unsigned int num_inputs;
166
167 // R0 will always be used for index into vertex buffer
168 pAsm->number_used_registers = 1;
169 pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
170
171 // Map Inputs: Add 1 to mapping since R0 is used for index
172 num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
173 pAsm->number_used_registers += num_inputs;
174
175 // Create VFETCH instructions for inputs
176 if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
177 {
178 r700_error(ERROR_ASM_VTX_CLAUSE, "Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
179 return; //error
180 }
181
182 // Map Outputs
183 pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
184
185 pAsm->starting_export_register_number = pAsm->number_used_registers;
186
187 pAsm->number_used_registers += pAsm->number_of_exports;
188
189 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
190
191 for(ui=0; ui<pAsm->number_of_exports; ui++)
192 {
193 pAsm->pucOutMask[ui] = 0x0;
194 }
195
196 /* Map temporary registers (GPRs) */
197 pAsm->starting_temp_register_number = pAsm->number_used_registers;
198
199 if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
200 { /* arb uses NumNativeTemporaries */
201 pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
202 }
203 else
204 { /* fix func t_vp uses NumTemporaries */
205 pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
206 }
207
208 pAsm->uFirstHelpReg = pAsm->number_used_registers;
209 }
210
211 GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
212 struct gl_vertex_program *mesa_vp)
213 {
214 GLuint i, j;
215 GLint * puiTEMPwrites;
216 struct prog_instruction *pILInst;
217 InstDeps *pInstDeps;
218
219 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
220 for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
221 {
222 puiTEMPwrites[i] = -1;
223 }
224
225 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
226
227 for(i=0; i<mesa_vp->Base.NumInstructions; i++)
228 {
229 pInstDeps[i].nDstDep = -1;
230 pILInst = &(mesa_vp->Base.Instructions[i]);
231
232 //Dst
233 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
234 {
235 //Set lastwrite for the temp
236 puiTEMPwrites[pILInst->DstReg.Index] = i;
237 }
238
239 //Src
240 for(j=0; j<3; j++)
241 {
242 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
243 {
244 //Set dep.
245 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
246 }
247 else
248 {
249 pInstDeps[i].nSrcDeps[j] = -1;
250 }
251 }
252 }
253
254 vp->r700AsmCode.pInstDeps = pInstDeps;
255
256 FREE(puiTEMPwrites);
257
258 return GL_TRUE;
259 }
260
261 GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp,
262 struct gl_vertex_program *mesa_vp)
263 {
264 //Init_Program
265 Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
266 Map_Vertex_Program( vp, mesa_vp );
267
268 if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp))
269 {
270 return GL_FALSE;
271 }
272
273 if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions,
274 &(mesa_vp->Base.Instructions[0]),
275 &(vp->r700AsmCode)) )
276 {
277 return GL_FALSE;
278 }
279
280 if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) )
281 {
282 return GL_FALSE;
283 }
284
285 vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
286 : (vp->r700AsmCode.number_used_registers - 1);
287
288 vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
289
290 vp->translated = GL_TRUE;
291
292 return GL_TRUE;
293 }
294
295 void r700SelectVertexShader(GLcontext *ctx)
296 {
297 context_t *context = R700_CONTEXT(ctx);
298 struct r700_vertex_program *vpc
299 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
300 if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670)
301 {
302 vpc->r700AsmCode.bR6xx = 1;
303 }
304
305 TNLcontext *tnl = TNL_CONTEXT(ctx);
306 struct vertex_buffer *vb = &tnl->vb;
307
308 unsigned int unBit;
309 unsigned int i;
310 for(i=0; i<VERT_ATTRIB_MAX; i++)
311 {
312 unBit = 1 << i;
313 if(vpc->mesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
314 {
315 vpc->aos_desc[i].size = vb->AttribPtr[i]->size;
316 vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
317 vpc->aos_desc[i].type = GL_FLOAT;
318 }
319 }
320
321 if(GL_FALSE == vpc->translated)
322 {
323 r700TranslateVertexShader(vpc,
324 &(vpc->mesa_program) );
325 }
326 }
327
328 void * r700GetActiveVpShaderBo(GLcontext * ctx)
329 {
330 struct r700_vertex_program *vp
331 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
332
333 return vp->shaderbo;
334 }
335
336 GLboolean r700SetupVertexProgram(GLcontext * ctx)
337 {
338 context_t *context = R700_CONTEXT(ctx);
339
340 BATCH_LOCALS(&context->radeon);
341
342 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
343
344 struct r700_vertex_program *vp
345 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
346
347 struct gl_program_parameter_list *paramList;
348 unsigned int unNumParamData;
349
350 unsigned int ui;
351
352 if(GL_FALSE == vp->loaded)
353 {
354 if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
355 {
356 Assemble( &(vp->r700Shader) );
357 }
358
359 /* Load vp to gpu */
360 (context->chipobj.EmitShader)(ctx,
361 &(vp->shaderbo),
362 (GLvoid *)(vp->r700Shader.pProgram),
363 vp->r700Shader.uShaderBinaryDWORDSize,
364 "VS");
365
366 vp->loaded = GL_TRUE;
367 }
368
369 DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
370 vp->r700Shader.uShaderBinaryDWORDSize);
371
372 /* TODO : enable this after MemUse fixed *=
373 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
374 */
375
376 r700->SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
377
378 SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
379 NUM_GPRS_shift, NUM_GPRS_mask);
380
381 if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
382 {
383 SETfield(r700->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
384 STACK_SIZE_shift, STACK_SIZE_mask);
385 }
386
387 SETfield(r700->SPI_VS_OUT_CONFIG.u32All, vp->r700Shader.nParamExports - 1,
388 VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
389 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
390 NUM_INTERP_shift, NUM_INTERP_mask);
391
392 /*
393 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
394 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
395 */
396
397 /* sent out shader constants. */
398
399 paramList = vp->mesa_program.Base.Parameters;
400
401 if(NULL != paramList)
402 {
403 _mesa_load_state_parameters(ctx, paramList);
404
405 unNumParamData = paramList->NumParameters * 4;
406
407 BEGIN_BATCH_NO_AUTOSTATE(unNumParamData + 2);
408
409 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData));
410 /* assembler map const from very beginning. */
411 R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4);
412
413 unNumParamData = paramList->NumParameters;
414
415 for(ui=0; ui<unNumParamData; ui++)
416 {
417 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0])));
418 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1])));
419 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2])));
420 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3])));
421 }
422 END_BATCH();
423 COMMIT_BATCH();
424 }
425
426 return GL_TRUE;
427 }
428
429
430
431