r600 : Initial version of glsl fc.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_vertprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "main/mtypes.h"
36
37 #include "tnl/t_context.h"
38 #include "shader/program.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
41
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "shader/programopt.c"
46
47 #include "r700_debug.h"
48 #include "r700_vertprog.h"
49
50 unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
51 struct gl_vertex_program *mesa_vp,
52 unsigned int unStart)
53 {
54 unsigned int i;
55 unsigned int unBit;
56 unsigned int unTotal = unStart;
57
58 //!!!!!!! THE ORDER MATCH FS INPUT
59
60 unBit = 1 << VERT_RESULT_HPOS;
61 if(mesa_vp->Base.OutputsWritten & unBit)
62 {
63 pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
64 }
65
66 unBit = 1 << VERT_RESULT_COL0;
67 if(mesa_vp->Base.OutputsWritten & unBit)
68 {
69 pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
70 }
71
72 unBit = 1 << VERT_RESULT_COL1;
73 if(mesa_vp->Base.OutputsWritten & unBit)
74 {
75 pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
76 }
77
78 //TODO : dealing back face.
79 unBit = 1 << VERT_RESULT_BFC0;
80 if(mesa_vp->Base.OutputsWritten & unBit)
81 {
82 pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
83 }
84
85 unBit = 1 << VERT_RESULT_BFC1;
86 if(mesa_vp->Base.OutputsWritten & unBit)
87 {
88 pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
89 }
90
91 //TODO : dealing fog.
92 unBit = 1 << VERT_RESULT_FOGC;
93 if(mesa_vp->Base.OutputsWritten & unBit)
94 {
95 pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
96 }
97
98 //TODO : dealing point size.
99 unBit = 1 << VERT_RESULT_PSIZ;
100 if(mesa_vp->Base.OutputsWritten & unBit)
101 {
102 pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
103 }
104
105 for(i=0; i<8; i++)
106 {
107 unBit = 1 << (VERT_RESULT_TEX0 + i);
108 if(mesa_vp->Base.OutputsWritten & unBit)
109 {
110 pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
111 }
112 }
113
114 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
115 {
116 unBit = 1 << i;
117 if(mesa_vp->Base.OutputsWritten & unBit)
118 {
119 pAsm->ucVP_OutputMap[i] = unTotal++;
120 }
121 }
122
123 return (unTotal - unStart);
124 }
125
126 unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
127 struct gl_vertex_program *mesa_vp,
128 unsigned int unStart)
129 {
130 int i;
131 unsigned int unBit;
132 unsigned int unTotal = unStart;
133 for(i=0; i<VERT_ATTRIB_MAX; i++)
134 {
135 unBit = 1 << i;
136 if(mesa_vp->Base.InputsRead & unBit)
137 {
138 pAsm->ucVP_AttributeMap[i] = unTotal++;
139 }
140 }
141 return (unTotal - unStart);
142 }
143
144 GLboolean Process_Vertex_Program_Vfetch_Instructions(
145 struct r700_vertex_program *vp,
146 struct gl_vertex_program *mesa_vp)
147 {
148 int i;
149 unsigned int unBit;
150 VTX_FETCH_METHOD vtxFetchMethod;
151 vtxFetchMethod.bEnableMini = GL_FALSE;
152 vtxFetchMethod.mega_fetch_remainder = 0;
153
154 for(i=0; i<VERT_ATTRIB_MAX; i++)
155 {
156 unBit = 1 << i;
157 if(mesa_vp->Base.InputsRead & unBit)
158 {
159 assemble_vfetch_instruction(&vp->r700AsmCode,
160 i,
161 vp->r700AsmCode.ucVP_AttributeMap[i],
162 vp->aos_desc[i].size,
163 vp->aos_desc[i].type,
164 &vtxFetchMethod);
165 }
166 }
167
168 return GL_TRUE;
169 }
170
171 GLboolean Process_Vertex_Program_Vfetch_Instructions2(
172 GLcontext *ctx,
173 struct r700_vertex_program *vp,
174 struct gl_vertex_program *mesa_vp)
175 {
176 int i;
177 context_t *context = R700_CONTEXT(ctx);
178
179 VTX_FETCH_METHOD vtxFetchMethod;
180 vtxFetchMethod.bEnableMini = GL_FALSE;
181 vtxFetchMethod.mega_fetch_remainder = 0;
182
183 for(i=0; i<context->nNumActiveAos; i++)
184 {
185 assemble_vfetch_instruction2(&vp->r700AsmCode,
186 vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
187 context->stream_desc[i].type,
188 context->stream_desc[i].size,
189 context->stream_desc[i].element,
190 context->stream_desc[i]._signed,
191 context->stream_desc[i].normalize,
192 &vtxFetchMethod);
193 }
194
195 return GL_TRUE;
196 }
197
198 void Map_Vertex_Program(GLcontext *ctx,
199 struct r700_vertex_program *vp,
200 struct gl_vertex_program *mesa_vp)
201 {
202 GLuint ui;
203 r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
204 unsigned int num_inputs;
205
206 // R0 will always be used for index into vertex buffer
207 pAsm->number_used_registers = 1;
208 pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
209
210 // Map Inputs: Add 1 to mapping since R0 is used for index
211 num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
212 pAsm->number_used_registers += num_inputs;
213
214 // Create VFETCH instructions for inputs
215 if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
216 {
217 radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
218 return;
219 }
220
221 // Map Outputs
222 pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
223
224 pAsm->starting_export_register_number = pAsm->number_used_registers;
225
226 pAsm->number_used_registers += pAsm->number_of_exports;
227
228 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
229
230 for(ui=0; ui<pAsm->number_of_exports; ui++)
231 {
232 pAsm->pucOutMask[ui] = 0x0;
233 }
234
235 /* Map temporary registers (GPRs) */
236 pAsm->starting_temp_register_number = pAsm->number_used_registers;
237
238 if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
239 { /* arb uses NumNativeTemporaries */
240 pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
241 }
242 else
243 { /* fix func t_vp uses NumTemporaries */
244 pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
245 }
246
247 pAsm->flag_reg_index = pAsm->number_used_registers++;
248
249 pAsm->uFirstHelpReg = pAsm->number_used_registers;
250 }
251
252 GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
253 struct gl_vertex_program *mesa_vp)
254 {
255 GLuint i, j;
256 GLint * puiTEMPwrites;
257 struct prog_instruction *pILInst;
258 InstDeps *pInstDeps;
259
260 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
261 for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
262 {
263 puiTEMPwrites[i] = -1;
264 }
265
266 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
267
268 for(i=0; i<mesa_vp->Base.NumInstructions; i++)
269 {
270 pInstDeps[i].nDstDep = -1;
271 pILInst = &(mesa_vp->Base.Instructions[i]);
272
273 //Dst
274 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
275 {
276 //Set lastwrite for the temp
277 puiTEMPwrites[pILInst->DstReg.Index] = i;
278 }
279
280 //Src
281 for(j=0; j<3; j++)
282 {
283 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
284 {
285 //Set dep.
286 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
287 }
288 else
289 {
290 pInstDeps[i].nSrcDeps[j] = -1;
291 }
292 }
293 }
294
295 vp->r700AsmCode.pInstDeps = pInstDeps;
296
297 FREE(puiTEMPwrites);
298
299 return GL_TRUE;
300 }
301
302 struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
303 struct gl_vertex_program *mesa_vp)
304 {
305 context_t *context = R700_CONTEXT(ctx);
306 struct r700_vertex_program *vp;
307 unsigned int i;
308
309 vp = _mesa_calloc(sizeof(*vp));
310 vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
311
312 if (mesa_vp->IsPositionInvariant)
313 {
314 _mesa_insert_mvp_code(ctx, vp->mesa_program);
315 }
316
317 for(i=0; i<context->nNumActiveAos; i++)
318 {
319 vp->aos_desc[i].size = context->stream_desc[i].size;
320 vp->aos_desc[i].stride = context->stream_desc[i].stride;
321 vp->aos_desc[i].type = context->stream_desc[i].type;
322 }
323
324 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
325 {
326 vp->r700AsmCode.bR6xx = 1;
327 }
328
329 //Init_Program
330 Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
331 Map_Vertex_Program(ctx, vp, vp->mesa_program );
332
333 if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
334 {
335 return NULL;
336 }
337
338 InitShaderProgram(&(vp->r700AsmCode));
339
340 if(GL_FALSE == AssembleInstr(0,
341 vp->mesa_program->Base.NumInstructions,
342 &(vp->mesa_program->Base.Instructions[0]),
343 &(vp->r700AsmCode)) )
344 {
345 return NULL;
346 }
347
348 if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
349 {
350 return NULL;
351 }
352
353 if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) )
354 {
355 return GL_FALSE;
356 }
357
358 vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
359 : (vp->r700AsmCode.number_used_registers - 1);
360
361 vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
362
363 vp->translated = GL_TRUE;
364
365 return vp;
366 }
367
368 void r700SelectVertexShader(GLcontext *ctx)
369 {
370 context_t *context = R700_CONTEXT(ctx);
371 struct r700_vertex_program_cont *vpc;
372 struct r700_vertex_program *vp;
373 unsigned int i;
374 GLboolean match;
375 GLbitfield InputsRead;
376
377 vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
378
379 InputsRead = vpc->mesa_program.Base.InputsRead;
380 if (vpc->mesa_program.IsPositionInvariant)
381 {
382 InputsRead |= VERT_BIT_POS;
383 }
384
385 for (vp = vpc->progs; vp; vp = vp->next)
386 {
387 match = GL_TRUE;
388 for(i=0; i<context->nNumActiveAos; i++)
389 {
390 if (vp->aos_desc[i].size != context->stream_desc[i].size)
391 {
392 match = GL_FALSE;
393 break;
394 }
395 }
396 if (match)
397 {
398 context->selected_vp = vp;
399 return;
400 }
401 }
402
403 vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
404 if(!vp)
405 {
406 radeon_error("Failed to translate vertex shader. \n");
407 return;
408 }
409 vp->next = vpc->progs;
410 vpc->progs = vp;
411 context->selected_vp = vp;
412 return;
413 }
414
415 int getTypeSize(GLenum type)
416 {
417 switch (type)
418 {
419 case GL_DOUBLE:
420 return sizeof(GLdouble);
421 case GL_FLOAT:
422 return sizeof(GLfloat);
423 case GL_INT:
424 return sizeof(GLint);
425 case GL_UNSIGNED_INT:
426 return sizeof(GLuint);
427 case GL_SHORT:
428 return sizeof(GLshort);
429 case GL_UNSIGNED_SHORT:
430 return sizeof(GLushort);
431 case GL_BYTE:
432 return sizeof(GLbyte);
433 case GL_UNSIGNED_BYTE:
434 return sizeof(GLubyte);
435 default:
436 assert(0);
437 return 0;
438 }
439 }
440
441 static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
442 {
443 context_t *context = R700_CONTEXT(ctx);
444
445 StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
446
447 GLuint stride;
448
449 stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
450 : input->StrideB;
451
452 if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
453 #if MESA_BIG_ENDIAN
454 getTypeSize(input->Type) != 4 ||
455 #endif
456 stride < 4)
457 {
458 pStreamDesc->type = GL_FLOAT;
459
460 if (input->StrideB == 0)
461 {
462 pStreamDesc->stride = 0;
463 }
464 else
465 {
466 pStreamDesc->stride = sizeof(GLfloat) * input->Size;
467 }
468 pStreamDesc->dwords = input->Size;
469 pStreamDesc->is_named_bo = GL_FALSE;
470 }
471 else
472 {
473 pStreamDesc->type = input->Type;
474 pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
475 if (!input->BufferObj->Name)
476 {
477 if (input->StrideB == 0)
478 {
479 pStreamDesc->stride = 0;
480 }
481 else
482 {
483 pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
484 }
485
486 pStreamDesc->is_named_bo = GL_FALSE;
487 }
488 }
489
490 pStreamDesc->size = input->Size;
491 pStreamDesc->dst_loc = context->nNumActiveAos;
492 pStreamDesc->element = unLoc;
493
494 switch (pStreamDesc->type)
495 { //GetSurfaceFormat
496 case GL_FLOAT:
497 pStreamDesc->_signed = 0;
498 pStreamDesc->normalize = GL_FALSE;
499 break;
500 case GL_SHORT:
501 pStreamDesc->_signed = 1;
502 pStreamDesc->normalize = input->Normalized;
503 break;
504 case GL_BYTE:
505 pStreamDesc->_signed = 1;
506 pStreamDesc->normalize = input->Normalized;
507 break;
508 case GL_UNSIGNED_SHORT:
509 pStreamDesc->_signed = 0;
510 pStreamDesc->normalize = input->Normalized;
511 break;
512 case GL_UNSIGNED_BYTE:
513 pStreamDesc->_signed = 0;
514 pStreamDesc->normalize = input->Normalized;
515 break;
516 default:
517 case GL_INT:
518 case GL_UNSIGNED_INT:
519 case GL_DOUBLE:
520 assert(0);
521 break;
522 }
523 context->nNumActiveAos++;
524 }
525
526 void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
527 {
528 context_t *context = R700_CONTEXT(ctx);
529 struct r700_vertex_program *vpc
530 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
531
532 struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
533 unsigned int unLoc = 0;
534 unsigned int unBit = mesa_vp->Base.InputsRead;
535 context->nNumActiveAos = 0;
536
537 if (mesa_vp->IsPositionInvariant)
538 {
539 unBit |= VERT_BIT_POS;
540 }
541
542 while(unBit)
543 {
544 if(unBit & 1)
545 {
546 r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
547 }
548
549 unBit >>= 1;
550 ++unLoc;
551 }
552 context->radeon.tcl.aos_count = context->nNumActiveAos;
553 }
554
555 void * r700GetActiveVpShaderBo(GLcontext * ctx)
556 {
557 context_t *context = R700_CONTEXT(ctx);
558 struct r700_vertex_program *vp = context->selected_vp;;
559
560 if (vp)
561 return vp->shaderbo;
562 else
563 return NULL;
564 }
565
566 GLboolean r700SetupVertexProgram(GLcontext * ctx)
567 {
568 context_t *context = R700_CONTEXT(ctx);
569 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
570 struct r700_vertex_program *vp = context->selected_vp;
571
572 struct gl_program_parameter_list *paramList;
573 unsigned int unNumParamData;
574 unsigned int ui;
575
576 if(GL_FALSE == vp->loaded)
577 {
578 if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
579 {
580 Assemble( &(vp->r700Shader) );
581 }
582
583 /* Load vp to gpu */
584 r600EmitShader(ctx,
585 &(vp->shaderbo),
586 (GLvoid *)(vp->r700Shader.pProgram),
587 vp->r700Shader.uShaderBinaryDWORDSize,
588 "VS");
589
590 vp->loaded = GL_TRUE;
591 }
592
593 DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
594 vp->r700Shader.uShaderBinaryDWORDSize);
595
596 /* TODO : enable this after MemUse fixed *=
597 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
598 */
599
600 R600_STATECHANGE(context, vs);
601 R600_STATECHANGE(context, fs); /* hack */
602
603 r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
604 SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
605
606 r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
607
608 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
609 NUM_GPRS_shift, NUM_GPRS_mask);
610
611 if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
612 {
613 SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
614 STACK_SIZE_shift, STACK_SIZE_mask);
615 }
616
617 R600_STATECHANGE(context, spi);
618
619 SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
620 vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
621 VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
622 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
623 NUM_INTERP_shift, NUM_INTERP_mask);
624
625 /*
626 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
627 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
628 */
629
630 /* sent out shader constants. */
631 paramList = vp->mesa_program->Base.Parameters;
632
633 if(NULL != paramList) {
634 _mesa_load_state_parameters(ctx, paramList);
635
636 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
637 return GL_FALSE;
638
639 R600_STATECHANGE(context, vs_consts);
640
641 r700->vs.num_consts = paramList->NumParameters;
642
643 unNumParamData = paramList->NumParameters;
644
645 for(ui=0; ui<unNumParamData; ui++) {
646 r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
647 r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
648 r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
649 r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
650 }
651 } else
652 r700->vs.num_consts = 0;
653
654 return GL_TRUE;
655 }