2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
34 #include "main/imports.h"
35 #include "main/mtypes.h"
37 #include "tnl/t_context.h"
38 #include "shader/program.h"
39 #include "shader/prog_parameter.h"
40 #include "shader/prog_statevars.h"
42 #include "radeon_debug.h"
43 #include "r600_context.h"
44 #include "r600_cmdbuf.h"
45 #include "shader/programopt.h"
47 #include "r700_debug.h"
48 #include "r700_vertprog.h"
50 unsigned int Map_Vertex_Output(r700_AssemblerBase
*pAsm
,
51 struct gl_vertex_program
*mesa_vp
,
56 unsigned int unTotal
= unStart
;
58 //!!!!!!! THE ORDER MATCH FS INPUT
60 unBit
= 1 << VERT_RESULT_HPOS
;
61 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
63 pAsm
->ucVP_OutputMap
[VERT_RESULT_HPOS
] = unTotal
++;
66 unBit
= 1 << VERT_RESULT_COL0
;
67 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
69 pAsm
->ucVP_OutputMap
[VERT_RESULT_COL0
] = unTotal
++;
72 unBit
= 1 << VERT_RESULT_COL1
;
73 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
75 pAsm
->ucVP_OutputMap
[VERT_RESULT_COL1
] = unTotal
++;
78 //TODO : dealing back face.
79 unBit
= 1 << VERT_RESULT_BFC0
;
80 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
82 pAsm
->ucVP_OutputMap
[VERT_RESULT_BFC0
] = unTotal
++;
85 unBit
= 1 << VERT_RESULT_BFC1
;
86 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
88 pAsm
->ucVP_OutputMap
[VERT_RESULT_BFC1
] = unTotal
++;
92 unBit
= 1 << VERT_RESULT_FOGC
;
93 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
95 pAsm
->ucVP_OutputMap
[VERT_RESULT_FOGC
] = unTotal
++;
98 //TODO : dealing point size.
99 unBit
= 1 << VERT_RESULT_PSIZ
;
100 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
102 pAsm
->ucVP_OutputMap
[VERT_RESULT_PSIZ
] = unTotal
++;
107 unBit
= 1 << (VERT_RESULT_TEX0
+ i
);
108 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
110 pAsm
->ucVP_OutputMap
[VERT_RESULT_TEX0
+ i
] = unTotal
++;
114 for(i
=VERT_RESULT_VAR0
; i
<VERT_RESULT_MAX
; i
++)
117 if(mesa_vp
->Base
.OutputsWritten
& unBit
)
119 pAsm
->ucVP_OutputMap
[i
] = unTotal
++;
123 return (unTotal
- unStart
);
126 unsigned int Map_Vertex_Input(r700_AssemblerBase
*pAsm
,
127 struct gl_vertex_program
*mesa_vp
,
128 unsigned int unStart
)
132 unsigned int unTotal
= unStart
;
133 for(i
=0; i
<VERT_ATTRIB_MAX
; i
++)
136 if(mesa_vp
->Base
.InputsRead
& unBit
)
138 pAsm
->ucVP_AttributeMap
[i
] = unTotal
++;
141 return (unTotal
- unStart
);
144 GLboolean
Process_Vertex_Program_Vfetch_Instructions(
145 struct r700_vertex_program
*vp
,
146 struct gl_vertex_program
*mesa_vp
)
150 VTX_FETCH_METHOD vtxFetchMethod
;
151 vtxFetchMethod
.bEnableMini
= GL_FALSE
;
152 vtxFetchMethod
.mega_fetch_remainder
= 0;
154 for(i
=0; i
<VERT_ATTRIB_MAX
; i
++)
157 if(mesa_vp
->Base
.InputsRead
& unBit
)
159 assemble_vfetch_instruction(&vp
->r700AsmCode
,
161 vp
->r700AsmCode
.ucVP_AttributeMap
[i
],
162 vp
->aos_desc
[i
].size
,
163 vp
->aos_desc
[i
].type
,
171 GLboolean
Process_Vertex_Program_Vfetch_Instructions2(
173 struct r700_vertex_program
*vp
,
174 struct gl_vertex_program
*mesa_vp
)
177 context_t
*context
= R700_CONTEXT(ctx
);
179 VTX_FETCH_METHOD vtxFetchMethod
;
180 vtxFetchMethod
.bEnableMini
= GL_FALSE
;
181 vtxFetchMethod
.mega_fetch_remainder
= 0;
183 for(i
=0; i
<context
->nNumActiveAos
; i
++)
185 assemble_vfetch_instruction2(&vp
->r700AsmCode
,
186 vp
->r700AsmCode
.ucVP_AttributeMap
[context
->stream_desc
[i
].element
],
187 context
->stream_desc
[i
].type
,
188 context
->stream_desc
[i
].size
,
189 context
->stream_desc
[i
].element
,
190 context
->stream_desc
[i
]._signed
,
191 context
->stream_desc
[i
].normalize
,
192 context
->stream_desc
[i
].format
,
199 void Map_Vertex_Program(GLcontext
*ctx
,
200 struct r700_vertex_program
*vp
,
201 struct gl_vertex_program
*mesa_vp
)
204 r700_AssemblerBase
*pAsm
= &(vp
->r700AsmCode
);
205 unsigned int num_inputs
;
207 // R0 will always be used for index into vertex buffer
208 pAsm
->number_used_registers
= 1;
209 pAsm
->starting_vfetch_register_number
= pAsm
->number_used_registers
;
211 // Map Inputs: Add 1 to mapping since R0 is used for index
212 num_inputs
= Map_Vertex_Input(pAsm
, mesa_vp
, pAsm
->number_used_registers
);
213 pAsm
->number_used_registers
+= num_inputs
;
215 // Create VFETCH instructions for inputs
216 if (GL_TRUE
!= Process_Vertex_Program_Vfetch_Instructions2(ctx
, vp
, mesa_vp
) )
218 radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
223 pAsm
->number_of_exports
= Map_Vertex_Output(pAsm
, mesa_vp
, pAsm
->number_used_registers
);
225 pAsm
->starting_export_register_number
= pAsm
->number_used_registers
;
227 pAsm
->number_used_registers
+= pAsm
->number_of_exports
;
229 pAsm
->pucOutMask
= (unsigned char*) MALLOC(pAsm
->number_of_exports
);
231 for(ui
=0; ui
<pAsm
->number_of_exports
; ui
++)
233 pAsm
->pucOutMask
[ui
] = 0x0;
236 /* Map temporary registers (GPRs) */
237 pAsm
->starting_temp_register_number
= pAsm
->number_used_registers
;
239 if(mesa_vp
->Base
.NumNativeTemporaries
>= mesa_vp
->Base
.NumTemporaries
)
240 { /* arb uses NumNativeTemporaries */
241 pAsm
->number_used_registers
+= mesa_vp
->Base
.NumNativeTemporaries
;
244 { /* fix func t_vp uses NumTemporaries */
245 pAsm
->number_used_registers
+= mesa_vp
->Base
.NumTemporaries
;
248 pAsm
->flag_reg_index
= pAsm
->number_used_registers
++;
250 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
253 GLboolean
Find_Instruction_Dependencies_vp(struct r700_vertex_program
*vp
,
254 struct gl_vertex_program
*mesa_vp
)
257 GLint
* puiTEMPwrites
;
258 struct prog_instruction
*pILInst
;
261 puiTEMPwrites
= (GLint
*) MALLOC(sizeof(GLuint
)*mesa_vp
->Base
.NumTemporaries
);
262 for(i
=0; i
<mesa_vp
->Base
.NumTemporaries
; i
++)
264 puiTEMPwrites
[i
] = -1;
267 pInstDeps
= (InstDeps
*)MALLOC(sizeof(InstDeps
)*mesa_vp
->Base
.NumInstructions
);
269 for(i
=0; i
<mesa_vp
->Base
.NumInstructions
; i
++)
271 pInstDeps
[i
].nDstDep
= -1;
272 pILInst
= &(mesa_vp
->Base
.Instructions
[i
]);
275 if(pILInst
->DstReg
.File
== PROGRAM_TEMPORARY
)
277 //Set lastwrite for the temp
278 puiTEMPwrites
[pILInst
->DstReg
.Index
] = i
;
284 if(pILInst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
)
287 pInstDeps
[i
].nSrcDeps
[j
] = puiTEMPwrites
[pILInst
->SrcReg
[j
].Index
];
291 pInstDeps
[i
].nSrcDeps
[j
] = -1;
296 vp
->r700AsmCode
.pInstDeps
= pInstDeps
;
303 struct r700_vertex_program
* r700TranslateVertexShader(GLcontext
*ctx
,
304 struct gl_vertex_program
*mesa_vp
)
306 context_t
*context
= R700_CONTEXT(ctx
);
307 struct r700_vertex_program
*vp
;
310 vp
= calloc(1, sizeof(*vp
));
311 vp
->mesa_program
= _mesa_clone_vertex_program(ctx
, mesa_vp
);
313 if (mesa_vp
->IsPositionInvariant
)
315 _mesa_insert_mvp_code(ctx
, vp
->mesa_program
);
318 for(i
=0; i
<context
->nNumActiveAos
; i
++)
320 vp
->aos_desc
[i
].size
= context
->stream_desc
[i
].size
;
321 vp
->aos_desc
[i
].stride
= context
->stream_desc
[i
].stride
;
322 vp
->aos_desc
[i
].type
= context
->stream_desc
[i
].type
;
323 vp
->aos_desc
[i
].format
= context
->stream_desc
[i
].format
;
326 if (context
->radeon
.radeonScreen
->chip_family
< CHIP_FAMILY_RV770
)
328 vp
->r700AsmCode
.bR6xx
= 1;
332 Init_r700_AssemblerBase(SPT_VP
, &(vp
->r700AsmCode
), &(vp
->r700Shader
) );
333 Map_Vertex_Program(ctx
, vp
, vp
->mesa_program
);
335 if(GL_FALSE
== Find_Instruction_Dependencies_vp(vp
, vp
->mesa_program
))
340 InitShaderProgram(&(vp
->r700AsmCode
));
342 for(i
=0; i
< MAX_SAMPLERS
; i
++)
344 vp
->r700AsmCode
.SamplerUnits
[i
] = vp
->mesa_program
->Base
.SamplerUnits
[i
];
347 vp
->r700AsmCode
.unCurNumILInsts
= vp
->mesa_program
->Base
.NumInstructions
;
349 if(GL_FALSE
== AssembleInstr(0,
351 vp
->mesa_program
->Base
.NumInstructions
,
352 &(vp
->mesa_program
->Base
.Instructions
[0]),
353 &(vp
->r700AsmCode
)) )
358 if(GL_FALSE
== Process_Vertex_Exports(&(vp
->r700AsmCode
), vp
->mesa_program
->Base
.OutputsWritten
) )
363 if( GL_FALSE
== RelocProgram(&(vp
->r700AsmCode
), &(vp
->mesa_program
->Base
)) )
368 vp
->r700Shader
.nRegs
= (vp
->r700AsmCode
.number_used_registers
== 0) ? 0
369 : (vp
->r700AsmCode
.number_used_registers
- 1);
371 vp
->r700Shader
.nParamExports
= vp
->r700AsmCode
.number_of_exports
;
373 vp
->translated
= GL_TRUE
;
378 void r700SelectVertexShader(GLcontext
*ctx
)
380 context_t
*context
= R700_CONTEXT(ctx
);
381 struct r700_vertex_program_cont
*vpc
;
382 struct r700_vertex_program
*vp
;
385 GLbitfield InputsRead
;
387 vpc
= (struct r700_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
389 InputsRead
= vpc
->mesa_program
.Base
.InputsRead
;
390 if (vpc
->mesa_program
.IsPositionInvariant
)
392 InputsRead
|= VERT_BIT_POS
;
395 for (vp
= vpc
->progs
; vp
; vp
= vp
->next
)
398 for(i
=0; i
<context
->nNumActiveAos
; i
++)
400 if (vp
->aos_desc
[i
].size
!= context
->stream_desc
[i
].size
||
401 vp
->aos_desc
[i
].format
!= context
->stream_desc
[i
].format
)
409 context
->selected_vp
= vp
;
414 vp
= r700TranslateVertexShader(ctx
, &(vpc
->mesa_program
));
417 radeon_error("Failed to translate vertex shader. \n");
420 vp
->next
= vpc
->progs
;
422 context
->selected_vp
= vp
;
426 int getTypeSize(GLenum type
)
431 return sizeof(GLdouble
);
433 return sizeof(GLfloat
);
435 return sizeof(GLint
);
436 case GL_UNSIGNED_INT
:
437 return sizeof(GLuint
);
439 return sizeof(GLshort
);
440 case GL_UNSIGNED_SHORT
:
441 return sizeof(GLushort
);
443 return sizeof(GLbyte
);
444 case GL_UNSIGNED_BYTE
:
445 return sizeof(GLubyte
);
452 static void r700TranslateAttrib(GLcontext
*ctx
, GLuint unLoc
, int count
, const struct gl_client_array
*input
)
454 context_t
*context
= R700_CONTEXT(ctx
);
456 StreamDesc
* pStreamDesc
= &(context
->stream_desc
[context
->nNumActiveAos
]);
460 stride
= (input
->StrideB
== 0) ? getTypeSize(input
->Type
) * input
->Size
463 if (input
->Type
== GL_DOUBLE
|| input
->Type
== GL_UNSIGNED_INT
|| input
->Type
== GL_INT
||
465 getTypeSize(input
->Type
) != 4 ||
469 pStreamDesc
->type
= GL_FLOAT
;
471 if (input
->StrideB
== 0)
473 pStreamDesc
->stride
= 0;
477 pStreamDesc
->stride
= sizeof(GLfloat
) * input
->Size
;
479 pStreamDesc
->dwords
= input
->Size
;
480 pStreamDesc
->is_named_bo
= GL_FALSE
;
484 pStreamDesc
->type
= input
->Type
;
485 pStreamDesc
->dwords
= (getTypeSize(input
->Type
) * input
->Size
+ 3)/ 4;
486 if (!input
->BufferObj
->Name
)
488 if (input
->StrideB
== 0)
490 pStreamDesc
->stride
= 0;
494 pStreamDesc
->stride
= (getTypeSize(pStreamDesc
->type
) * input
->Size
+ 3) & ~3;
497 pStreamDesc
->is_named_bo
= GL_FALSE
;
501 pStreamDesc
->size
= input
->Size
;
502 pStreamDesc
->dst_loc
= context
->nNumActiveAos
;
503 pStreamDesc
->element
= unLoc
;
504 pStreamDesc
->format
= input
->Format
;
506 switch (pStreamDesc
->type
)
509 pStreamDesc
->_signed
= 0;
510 pStreamDesc
->normalize
= GL_FALSE
;
513 pStreamDesc
->_signed
= 1;
514 pStreamDesc
->normalize
= input
->Normalized
;
517 pStreamDesc
->_signed
= 1;
518 pStreamDesc
->normalize
= input
->Normalized
;
520 case GL_UNSIGNED_SHORT
:
521 pStreamDesc
->_signed
= 0;
522 pStreamDesc
->normalize
= input
->Normalized
;
524 case GL_UNSIGNED_BYTE
:
525 pStreamDesc
->_signed
= 0;
526 pStreamDesc
->normalize
= input
->Normalized
;
530 case GL_UNSIGNED_INT
:
535 context
->nNumActiveAos
++;
538 void r700SetVertexFormat(GLcontext
*ctx
, const struct gl_client_array
*arrays
[], int count
)
540 context_t
*context
= R700_CONTEXT(ctx
);
541 struct r700_vertex_program
*vpc
542 = (struct r700_vertex_program
*)ctx
->VertexProgram
._Current
;
544 struct gl_vertex_program
* mesa_vp
= (struct gl_vertex_program
*)&(vpc
->mesa_program
);
545 unsigned int unLoc
= 0;
546 unsigned int unBit
= mesa_vp
->Base
.InputsRead
;
547 context
->nNumActiveAos
= 0;
549 if (mesa_vp
->IsPositionInvariant
)
551 unBit
|= VERT_BIT_POS
;
558 r700TranslateAttrib(ctx
, unLoc
, count
, arrays
[unLoc
]);
564 context
->radeon
.tcl
.aos_count
= context
->nNumActiveAos
;
567 void * r700GetActiveVpShaderBo(GLcontext
* ctx
)
569 context_t
*context
= R700_CONTEXT(ctx
);
570 struct r700_vertex_program
*vp
= context
->selected_vp
;;
578 GLboolean
r700SetupVertexProgram(GLcontext
* ctx
)
580 context_t
*context
= R700_CONTEXT(ctx
);
581 R700_CHIP_CONTEXT
*r700
= (R700_CHIP_CONTEXT
*)(&context
->hw
);
582 struct r700_vertex_program
*vp
= context
->selected_vp
;
584 struct gl_program_parameter_list
*paramList
;
585 unsigned int unNumParamData
;
588 if(GL_FALSE
== vp
->loaded
)
590 if(vp
->r700Shader
.bNeedsAssembly
== GL_TRUE
)
592 Assemble( &(vp
->r700Shader
) );
598 (GLvoid
*)(vp
->r700Shader
.pProgram
),
599 vp
->r700Shader
.uShaderBinaryDWORDSize
,
602 vp
->loaded
= GL_TRUE
;
605 DumpHwBinary(DUMP_VERTEX_SHADER
, (GLvoid
*)(vp
->r700Shader
.pProgram
),
606 vp
->r700Shader
.uShaderBinaryDWORDSize
);
608 /* TODO : enable this after MemUse fixed *=
609 (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
612 R600_STATECHANGE(context
, vs
);
613 R600_STATECHANGE(context
, fs
); /* hack */
615 r700
->vs
.SQ_PGM_RESOURCES_VS
.u32All
= 0;
616 SETbit(r700
->vs
.SQ_PGM_RESOURCES_VS
.u32All
, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit
);
618 r700
->vs
.SQ_PGM_START_VS
.u32All
= 0; /* set from buffer object. */
620 SETfield(r700
->vs
.SQ_PGM_RESOURCES_VS
.u32All
, vp
->r700Shader
.nRegs
+ 1,
621 NUM_GPRS_shift
, NUM_GPRS_mask
);
623 if(vp
->r700Shader
.uStackSize
) /* we don't use branch for now, it should be zero. */
625 SETfield(r700
->vs
.SQ_PGM_RESOURCES_VS
.u32All
, vp
->r700Shader
.uStackSize
,
626 STACK_SIZE_shift
, STACK_SIZE_mask
);
629 R600_STATECHANGE(context
, spi
);
631 SETfield(r700
->SPI_VS_OUT_CONFIG
.u32All
,
632 vp
->r700Shader
.nParamExports
? (vp
->r700Shader
.nParamExports
- 1) : 0,
633 VS_EXPORT_COUNT_shift
, VS_EXPORT_COUNT_mask
);
634 SETfield(r700
->SPI_PS_IN_CONTROL_0
.u32All
, vp
->r700Shader
.nParamExports
,
635 NUM_INTERP_shift
, NUM_INTERP_mask
);
638 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
639 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
642 /* sent out shader constants. */
643 paramList
= vp
->mesa_program
->Base
.Parameters
;
645 if(NULL
!= paramList
) {
646 /* vp->mesa_program was cloned, not updated by glsl shader api. */
647 /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
648 /* so, use ctx->VertexProgem._Current */
649 struct gl_program_parameter_list
*paramListOrginal
=
650 ctx
->VertexProgram
._Current
->Base
.Parameters
;
652 _mesa_load_state_parameters(ctx
, paramList
);
654 if (paramList
->NumParameters
> R700_MAX_DX9_CONSTS
)
657 R600_STATECHANGE(context
, vs_consts
);
659 r700
->vs
.num_consts
= paramList
->NumParameters
;
661 unNumParamData
= paramList
->NumParameters
;
663 for(ui
=0; ui
<unNumParamData
; ui
++) {
664 if(paramList
->Parameters
[ui
].Type
== PROGRAM_UNIFORM
)
666 r700
->vs
.consts
[ui
][0].f32All
= paramListOrginal
->ParameterValues
[ui
][0];
667 r700
->vs
.consts
[ui
][1].f32All
= paramListOrginal
->ParameterValues
[ui
][1];
668 r700
->vs
.consts
[ui
][2].f32All
= paramListOrginal
->ParameterValues
[ui
][2];
669 r700
->vs
.consts
[ui
][3].f32All
= paramListOrginal
->ParameterValues
[ui
][3];
673 r700
->vs
.consts
[ui
][0].f32All
= paramList
->ParameterValues
[ui
][0];
674 r700
->vs
.consts
[ui
][1].f32All
= paramList
->ParameterValues
[ui
][1];
675 r700
->vs
.consts
[ui
][2].f32All
= paramList
->ParameterValues
[ui
][2];
676 r700
->vs
.consts
[ui
][3].f32All
= paramList
->ParameterValues
[ui
][3];
680 r700
->vs
.num_consts
= 0;
682 COMPILED_SUB
* pCompiledSub
;
684 GLuint unConstOffset
= r700
->vs
.num_consts
;
685 for(ui
=0; ui
<vp
->r700AsmCode
.unNumPresub
; ui
++)
687 pCompiledSub
= vp
->r700AsmCode
.presubs
[ui
].pCompiledSub
;
689 r700
->vs
.num_consts
+= pCompiledSub
->NumParameters
;
691 for(uj
=0; uj
<pCompiledSub
->NumParameters
; uj
++)
693 r700
->vs
.consts
[uj
+ unConstOffset
][0].f32All
= pCompiledSub
->ParameterValues
[uj
][0];
694 r700
->vs
.consts
[uj
+ unConstOffset
][1].f32All
= pCompiledSub
->ParameterValues
[uj
][1];
695 r700
->vs
.consts
[uj
+ unConstOffset
][2].f32All
= pCompiledSub
->ParameterValues
[uj
][2];
696 r700
->vs
.consts
[uj
+ unConstOffset
][3].f32All
= pCompiledSub
->ParameterValues
[uj
][3];
698 unConstOffset
+= pCompiledSub
->NumParameters
;