r600: remove remains of old tnl pipeline
[mesa.git] / src / mesa / drivers / dri / r600 / r700_vertprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "tnl/t_context.h"
38 #include "shader/program.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
41
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "shader/programopt.c"
46
47 #include "r700_debug.h"
48 #include "r700_vertprog.h"
49
50 unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
51 struct gl_vertex_program *mesa_vp,
52 unsigned int unStart)
53 {
54 unsigned int i;
55 unsigned int unBit;
56 unsigned int unTotal = unStart;
57
58 //!!!!!!! THE ORDER MATCH FS INPUT
59
60 unBit = 1 << VERT_RESULT_HPOS;
61 if(mesa_vp->Base.OutputsWritten & unBit)
62 {
63 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
64 }
65
66 unBit = 1 << VERT_RESULT_COL0;
67 if(mesa_vp->Base.OutputsWritten & unBit)
68 {
69 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
70 }
71
72 unBit = 1 << VERT_RESULT_COL1;
73 if(mesa_vp->Base.OutputsWritten & unBit)
74 {
75 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
76 }
77
78 //TODO : dealing back face.
79 unBit = 1 << VERT_RESULT_BFC0;
80 if(mesa_vp->Base.OutputsWritten & unBit)
81 {
82 pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
83 }
84
85 unBit = 1 << VERT_RESULT_BFC1;
86 if(mesa_vp->Base.OutputsWritten & unBit)
87 {
88 pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
89 }
90
91 //TODO : dealing fog.
92 unBit = 1 << VERT_RESULT_FOGC;
93 if(mesa_vp->Base.OutputsWritten & unBit)
94 {
95 pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
96 }
97
98 //TODO : dealing point size.
99 unBit = 1 << VERT_RESULT_PSIZ;
100 if(mesa_vp->Base.OutputsWritten & unBit)
101 {
102 pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
103 }
104
105 for(i=0; i<8; i++)
106 {
107 unBit = 1 << (VERT_RESULT_TEX0 + i);
108 if(mesa_vp->Base.OutputsWritten & unBit)
109 {
110 pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
111 }
112 }
113
114 return (unTotal - unStart);
115 }
116
117 unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
118 struct gl_vertex_program *mesa_vp,
119 unsigned int unStart)
120 {
121 int i;
122 unsigned int unBit;
123 unsigned int unTotal = unStart;
124 for(i=0; i<VERT_ATTRIB_MAX; i++)
125 {
126 unBit = 1 << i;
127 if(mesa_vp->Base.InputsRead & unBit)
128 {
129 pAsm->ucVP_AttributeMap[i] = unTotal++;
130 }
131 }
132 return (unTotal - unStart);
133 }
134
135 GLboolean Process_Vertex_Program_Vfetch_Instructions(
136 struct r700_vertex_program *vp,
137 struct gl_vertex_program *mesa_vp)
138 {
139 int i;
140 unsigned int unBit;
141 VTX_FETCH_METHOD vtxFetchMethod;
142 vtxFetchMethod.bEnableMini = GL_FALSE;
143 vtxFetchMethod.mega_fetch_remainder = 0;
144
145 for(i=0; i<VERT_ATTRIB_MAX; i++)
146 {
147 unBit = 1 << i;
148 if(mesa_vp->Base.InputsRead & unBit)
149 {
150 assemble_vfetch_instruction(&vp->r700AsmCode,
151 i,
152 vp->r700AsmCode.ucVP_AttributeMap[i],
153 vp->aos_desc[i].size,
154 vp->aos_desc[i].type,
155 &vtxFetchMethod);
156 }
157 }
158
159 return GL_TRUE;
160 }
161
162 GLboolean Process_Vertex_Program_Vfetch_Instructions2(
163 GLcontext *ctx,
164 struct r700_vertex_program *vp,
165 struct gl_vertex_program *mesa_vp)
166 {
167 int i;
168 context_t *context = R700_CONTEXT(ctx);
169
170 VTX_FETCH_METHOD vtxFetchMethod;
171 vtxFetchMethod.bEnableMini = GL_FALSE;
172 vtxFetchMethod.mega_fetch_remainder = 0;
173
174 for(i=0; i<context->nNumActiveAos; i++)
175 {
176 assemble_vfetch_instruction2(&vp->r700AsmCode,
177 vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
178 context->stream_desc[i].type,
179 context->stream_desc[i].size,
180 context->stream_desc[i].element,
181 context->stream_desc[i]._signed,
182 context->stream_desc[i].normalize,
183 &vtxFetchMethod);
184 }
185
186 return GL_TRUE;
187 }
188
189 void Map_Vertex_Program(GLcontext *ctx,
190 struct r700_vertex_program *vp,
191 struct gl_vertex_program *mesa_vp)
192 {
193 GLuint ui;
194 r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
195 unsigned int num_inputs;
196
197 // R0 will always be used for index into vertex buffer
198 pAsm->number_used_registers = 1;
199 pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
200
201 // Map Inputs: Add 1 to mapping since R0 is used for index
202 num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
203 pAsm->number_used_registers += num_inputs;
204
205 // Create VFETCH instructions for inputs
206 if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
207 {
208 radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
209 return;
210 }
211
212 // Map Outputs
213 pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
214
215 pAsm->starting_export_register_number = pAsm->number_used_registers;
216
217 pAsm->number_used_registers += pAsm->number_of_exports;
218
219 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
220
221 for(ui=0; ui<pAsm->number_of_exports; ui++)
222 {
223 pAsm->pucOutMask[ui] = 0x0;
224 }
225
226 /* Map temporary registers (GPRs) */
227 pAsm->starting_temp_register_number = pAsm->number_used_registers;
228
229 if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
230 { /* arb uses NumNativeTemporaries */
231 pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
232 }
233 else
234 { /* fix func t_vp uses NumTemporaries */
235 pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
236 }
237
238 pAsm->uFirstHelpReg = pAsm->number_used_registers;
239 }
240
241 GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
242 struct gl_vertex_program *mesa_vp)
243 {
244 GLuint i, j;
245 GLint * puiTEMPwrites;
246 struct prog_instruction *pILInst;
247 InstDeps *pInstDeps;
248
249 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
250 for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
251 {
252 puiTEMPwrites[i] = -1;
253 }
254
255 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
256
257 for(i=0; i<mesa_vp->Base.NumInstructions; i++)
258 {
259 pInstDeps[i].nDstDep = -1;
260 pILInst = &(mesa_vp->Base.Instructions[i]);
261
262 //Dst
263 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
264 {
265 //Set lastwrite for the temp
266 puiTEMPwrites[pILInst->DstReg.Index] = i;
267 }
268
269 //Src
270 for(j=0; j<3; j++)
271 {
272 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
273 {
274 //Set dep.
275 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
276 }
277 else
278 {
279 pInstDeps[i].nSrcDeps[j] = -1;
280 }
281 }
282 }
283
284 vp->r700AsmCode.pInstDeps = pInstDeps;
285
286 FREE(puiTEMPwrites);
287
288 return GL_TRUE;
289 }
290
291 struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
292 struct gl_vertex_program *mesa_vp)
293 {
294 context_t *context = R700_CONTEXT(ctx);
295 struct r700_vertex_program *vp;
296 unsigned int i;
297
298 vp = _mesa_calloc(sizeof(*vp));
299 vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
300
301 if (mesa_vp->IsPositionInvariant)
302 {
303 _mesa_insert_mvp_code(ctx, vp->mesa_program);
304 }
305
306 for(i=0; i<context->nNumActiveAos; i++)
307 {
308 vp->aos_desc[i].size = context->stream_desc[i].size;
309 vp->aos_desc[i].stride = context->stream_desc[i].stride;
310 vp->aos_desc[i].type = context->stream_desc[i].type;
311 }
312
313 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
314 {
315 vp->r700AsmCode.bR6xx = 1;
316 }
317
318 //Init_Program
319 Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
320 Map_Vertex_Program(ctx, vp, vp->mesa_program );
321
322 if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
323 {
324 return NULL;
325 }
326
327 if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
328 &(vp->mesa_program->Base.Instructions[0]),
329 &(vp->r700AsmCode)) )
330 {
331 return NULL;
332 }
333
334 if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
335 {
336 return NULL;
337 }
338
339 vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
340 : (vp->r700AsmCode.number_used_registers - 1);
341
342 vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
343
344 vp->translated = GL_TRUE;
345
346 return vp;
347 }
348
349 void r700SelectVertexShader(GLcontext *ctx)
350 {
351 context_t *context = R700_CONTEXT(ctx);
352 struct r700_vertex_program_cont *vpc;
353 struct r700_vertex_program *vp;
354 unsigned int i;
355 GLboolean match;
356 GLbitfield InputsRead;
357
358 vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
359
360 InputsRead = vpc->mesa_program.Base.InputsRead;
361 if (vpc->mesa_program.IsPositionInvariant)
362 {
363 InputsRead |= VERT_BIT_POS;
364 }
365
366 for (vp = vpc->progs; vp; vp = vp->next)
367 {
368 match = GL_TRUE;
369 for(i=0; i<context->nNumActiveAos; i++)
370 {
371 if (vp->aos_desc[i].size != context->stream_desc[i].size)
372 {
373 match = GL_FALSE;
374 break;
375 }
376 }
377 if (match)
378 {
379 context->selected_vp = vp;
380 return;
381 }
382 }
383
384 vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
385 if(!vp)
386 {
387 radeon_error("Failed to translate vertex shader. \n");
388 return;
389 }
390 vp->next = vpc->progs;
391 vpc->progs = vp;
392 context->selected_vp = vp;
393 return;
394 }
395
396 int getTypeSize(GLenum type)
397 {
398 switch (type)
399 {
400 case GL_DOUBLE:
401 return sizeof(GLdouble);
402 case GL_FLOAT:
403 return sizeof(GLfloat);
404 case GL_INT:
405 return sizeof(GLint);
406 case GL_UNSIGNED_INT:
407 return sizeof(GLuint);
408 case GL_SHORT:
409 return sizeof(GLshort);
410 case GL_UNSIGNED_SHORT:
411 return sizeof(GLushort);
412 case GL_BYTE:
413 return sizeof(GLbyte);
414 case GL_UNSIGNED_BYTE:
415 return sizeof(GLubyte);
416 default:
417 assert(0);
418 return 0;
419 }
420 }
421
422 static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
423 {
424 context_t *context = R700_CONTEXT(ctx);
425
426 StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
427
428 GLuint stride;
429
430 stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
431 : input->StrideB;
432
433 if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
434 #if MESA_BIG_ENDIAN
435 getTypeSize(input->Type) != 4 ||
436 #endif
437 stride < 4)
438 {
439 pStreamDesc->type = GL_FLOAT;
440
441 if (input->StrideB == 0)
442 {
443 pStreamDesc->stride = 0;
444 }
445 else
446 {
447 pStreamDesc->stride = sizeof(GLfloat) * input->Size;
448 }
449 pStreamDesc->dwords = input->Size;
450 pStreamDesc->is_named_bo = GL_FALSE;
451 }
452 else
453 {
454 pStreamDesc->type = input->Type;
455 pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
456 if (!input->BufferObj->Name)
457 {
458 if (input->StrideB == 0)
459 {
460 pStreamDesc->stride = 0;
461 }
462 else
463 {
464 pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
465 }
466
467 pStreamDesc->is_named_bo = GL_FALSE;
468 }
469 }
470
471 pStreamDesc->size = input->Size;
472 pStreamDesc->dst_loc = context->nNumActiveAos;
473 pStreamDesc->element = unLoc;
474
475 switch (pStreamDesc->type)
476 { //GetSurfaceFormat
477 case GL_FLOAT:
478 pStreamDesc->_signed = 0;
479 pStreamDesc->normalize = GL_FALSE;
480 break;
481 case GL_SHORT:
482 pStreamDesc->_signed = 1;
483 pStreamDesc->normalize = input->Normalized;
484 break;
485 case GL_BYTE:
486 pStreamDesc->_signed = 1;
487 pStreamDesc->normalize = input->Normalized;
488 break;
489 case GL_UNSIGNED_SHORT:
490 pStreamDesc->_signed = 0;
491 pStreamDesc->normalize = input->Normalized;
492 break;
493 case GL_UNSIGNED_BYTE:
494 pStreamDesc->_signed = 0;
495 pStreamDesc->normalize = input->Normalized;
496 break;
497 default:
498 case GL_INT:
499 case GL_UNSIGNED_INT:
500 case GL_DOUBLE:
501 assert(0);
502 break;
503 }
504 context->nNumActiveAos++;
505 }
506
507 void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
508 {
509 context_t *context = R700_CONTEXT(ctx);
510 struct r700_vertex_program *vpc
511 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
512
513 struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
514 unsigned int unLoc = 0;
515 unsigned int unBit = mesa_vp->Base.InputsRead;
516 context->nNumActiveAos = 0;
517
518 while(unBit)
519 {
520 if(unBit & 1)
521 {
522 r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
523 }
524
525 unBit >>= 1;
526 ++unLoc;
527 }
528 context->radeon.tcl.aos_count = context->nNumActiveAos;
529 }
530
531 void * r700GetActiveVpShaderBo(GLcontext * ctx)
532 {
533 context_t *context = R700_CONTEXT(ctx);
534 struct r700_vertex_program *vp = context->selected_vp;;
535
536 if (vp)
537 return vp->shaderbo;
538 else
539 return NULL;
540 }
541
542 GLboolean r700SetupVertexProgram(GLcontext * ctx)
543 {
544 context_t *context = R700_CONTEXT(ctx);
545 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
546 struct r700_vertex_program *vp = context->selected_vp;
547
548 struct gl_program_parameter_list *paramList;
549 unsigned int unNumParamData;
550 unsigned int ui;
551
552 if(GL_FALSE == vp->loaded)
553 {
554 if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
555 {
556 Assemble( &(vp->r700Shader) );
557 }
558
559 /* Load vp to gpu */
560 r600EmitShader(ctx,
561 &(vp->shaderbo),
562 (GLvoid *)(vp->r700Shader.pProgram),
563 vp->r700Shader.uShaderBinaryDWORDSize,
564 "VS");
565
566 vp->loaded = GL_TRUE;
567 }
568
569 DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
570 vp->r700Shader.uShaderBinaryDWORDSize);
571
572 /* TODO : enable this after MemUse fixed *=
573 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
574 */
575
576 R600_STATECHANGE(context, vs);
577 R600_STATECHANGE(context, fs); /* hack */
578
579 r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
580 SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
581
582 r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
583
584 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
585 NUM_GPRS_shift, NUM_GPRS_mask);
586
587 if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
588 {
589 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
590 STACK_SIZE_shift, STACK_SIZE_mask);
591 }
592
593 R600_STATECHANGE(context, spi);
594
595 SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
596 vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
597 VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
598 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
599 NUM_INTERP_shift, NUM_INTERP_mask);
600
601 /*
602 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
603 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
604 */
605
606 /* sent out shader constants. */
607 paramList = vp->mesa_program->Base.Parameters;
608
609 if(NULL != paramList) {
610 _mesa_load_state_parameters(ctx, paramList);
611
612 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
613 return GL_FALSE;
614
615 R600_STATECHANGE(context, vs_consts);
616
617 r700->vs.num_consts = paramList->NumParameters;
618
619 unNumParamData = paramList->NumParameters;
620
621 for(ui=0; ui<unNumParamData; ui++) {
622 r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
623 r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
624 r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
625 r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
626 }
627 } else
628 r700->vs.num_consts = 0;
629
630 return GL_TRUE;
631 }