2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/imports.h"
35 #include "main/glheader.h"
37 #include "r600_context.h"
39 #include "r700_shader.h"
41 void r700ShaderInit(struct gl_context
* ctx
)
45 void AddInstToList(TypedShaderList
* plstCFInstructions
, R700ShaderInstruction
* pInst
)
47 if(NULL
== plstCFInstructions
->pTail
)
49 plstCFInstructions
->pHead
= pInst
;
50 plstCFInstructions
->pTail
= pInst
;
54 plstCFInstructions
->pTail
->pNextInst
= pInst
;
55 plstCFInstructions
->pTail
= pInst
;
57 pInst
->pNextInst
= NULL
;
59 plstCFInstructions
->uNumOfNode
++;
62 void TakeInstOutFromList(TypedShaderList
* plstCFInstructions
, R700ShaderInstruction
* pInst
)
65 GLboolean bFound
= GL_FALSE
;
66 R700ShaderInstruction
* pPrevInst
= NULL
;
67 R700ShaderInstruction
* pCurInst
= plstCFInstructions
->pHead
;
69 /* Need go thro list to make sure pInst is there. */
70 while(NULL
!= pCurInst
)
79 pCurInst
= pCurInst
->pNextInst
;
83 plstCFInstructions
->uNumOfNode
--;
85 pCurInst
= pInst
->pNextInst
;
86 ulIndex
= pInst
->m_uIndex
;
87 while(NULL
!= pCurInst
)
89 pCurInst
->m_uIndex
= ulIndex
;
91 pCurInst
= pCurInst
->pNextInst
;
94 if(plstCFInstructions
->pHead
== pInst
)
96 plstCFInstructions
->pHead
= pInst
->pNextInst
;
98 if(plstCFInstructions
->pTail
== pInst
)
100 plstCFInstructions
->pTail
= pPrevInst
;
102 if(NULL
!= pPrevInst
)
104 pPrevInst
->pNextInst
= pInst
->pNextInst
;
111 void Init_R700_Shader(R700_Shader
* pShader
)
113 pShader
->Type
= R700_SHADER_INVALID
;
114 pShader
->pProgram
= NULL
;
115 pShader
->bBinaryShader
= GL_FALSE
;
116 pShader
->bFetchShaderRequired
= GL_FALSE
;
117 pShader
->bNeedsAssembly
= GL_FALSE
;
118 pShader
->bLinksDirty
= GL_TRUE
;
119 pShader
->uShaderBinaryDWORDSize
= 0;
121 pShader
->nParamExports
= 0;
122 pShader
->nMemExports
= 0;
123 pShader
->resource
= 0;
125 pShader
->exportMode
= 0;
126 pShader
->depthIsImported
= GL_FALSE
;
128 pShader
->positionVectorIsExported
= GL_FALSE
;
129 pShader
->miscVectorIsExported
= GL_FALSE
;
130 pShader
->renderTargetArrayIndexIsExported
= GL_FALSE
;
131 pShader
->ccDist0VectorIsExported
= GL_FALSE
;
132 pShader
->ccDist1VectorIsExported
= GL_FALSE
;
135 pShader
->depthIsExported
= GL_FALSE
;
136 pShader
->stencilRefIsExported
= GL_FALSE
;
137 pShader
->coverageToMaskIsExported
= GL_FALSE
;
138 pShader
->maskIsExported
= GL_FALSE
;
139 pShader
->killIsUsed
= GL_FALSE
;
141 pShader
->uCFOffset
= 0;
142 pShader
->uStackSize
= 0;
143 pShader
->uMaxCallDepth
= 0;
145 pShader
->bSurfAllocated
= GL_FALSE
;
147 pShader
->lstCFInstructions
.pHead
=NULL
;
148 pShader
->lstCFInstructions
.pTail
=NULL
;
149 pShader
->lstCFInstructions
.uNumOfNode
=0;
150 pShader
->lstALUInstructions
.pHead
=NULL
;
151 pShader
->lstALUInstructions
.pTail
=NULL
;
152 pShader
->lstALUInstructions
.uNumOfNode
=0;
153 pShader
->lstTEXInstructions
.pHead
=NULL
;
154 pShader
->lstTEXInstructions
.pTail
=NULL
;
155 pShader
->lstTEXInstructions
.uNumOfNode
=0;
156 pShader
->lstVTXInstructions
.pHead
=NULL
;
157 pShader
->lstVTXInstructions
.pTail
=NULL
;
158 pShader
->lstVTXInstructions
.uNumOfNode
=0;
161 void SetActiveCFlist(R700_Shader
*pShader
, TypedShaderList
* plstCF
)
163 pShader
->plstCFInstructions_active
= plstCF
;
166 void AddCFInstruction(R700_Shader
*pShader
, R700ControlFlowInstruction
*pCFInst
)
168 R700ControlFlowSXClause
* pSXClause
;
169 R700ControlFlowSMXClause
* pSMXClause
;
171 pCFInst
->m_uIndex
= pShader
->plstCFInstructions_active
->uNumOfNode
;
172 AddInstToList(pShader
->plstCFInstructions_active
,
173 (R700ShaderInstruction
*)pCFInst
);
174 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pCFInst
->m_ShaderInstType
);
178 switch (pCFInst
->m_ShaderInstType
)
180 case SIT_CF_ALL_EXP_SX
:
181 pSXClause
= (R700ControlFlowSXClause
*)pCFInst
;
183 case SIT_CF_ALL_EXP_SMX
:
184 pSMXClause
= (R700ControlFlowSMXClause
*)pCFInst
;
190 if((pSXClause
!= NULL
) && (pSXClause
->m_Word0
.f
.type
== SQ_EXPORT_PARAM
))
192 pShader
->nParamExports
+= pSXClause
->m_Word1
.f
.burst_count
+ 1;
194 else if ((pSMXClause
!= NULL
) && (pSMXClause
->m_Word1
.f
.cf_inst
== SQ_CF_INST_MEM_RING
) &&
195 (pSMXClause
->m_Word0
.f
.type
== SQ_EXPORT_WRITE
|| pSMXClause
->m_Word0
.f
.type
== SQ_EXPORT_WRITE_IND
))
197 pShader
->nMemExports
+= pSMXClause
->m_Word1
.f
.burst_count
+ 1;
200 pShader
->bLinksDirty
= GL_TRUE
;
201 pShader
->bNeedsAssembly
= GL_TRUE
;
206 void AddVTXInstruction(R700_Shader
*pShader
, R700VertexInstruction
*pVTXInst
)
208 pVTXInst
->m_uIndex
= pShader
->lstVTXInstructions
.uNumOfNode
;
209 AddInstToList(&(pShader
->lstVTXInstructions
),
210 (R700ShaderInstruction
*)pVTXInst
);
211 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pVTXInst
->m_ShaderInstType
);
213 if(pVTXInst
->m_ShaderInstType
== SIT_VTX_GENERIC
)
215 R700VertexGenericFetch
* pVTXGenericClause
= (R700VertexGenericFetch
*)pVTXInst
;
216 pShader
->nRegs
= (pShader
->nRegs
< pVTXGenericClause
->m_Word1_GPR
.f
.dst_gpr
) ? pVTXGenericClause
->m_Word1_GPR
.f
.dst_gpr
: pShader
->nRegs
;
219 pShader
->bLinksDirty
= GL_TRUE
;
220 pShader
->bNeedsAssembly
= GL_TRUE
;
222 pVTXInst
->useCount
++;
225 void AddTEXInstruction(R700_Shader
*pShader
, R700TextureInstruction
*pTEXInst
)
227 pTEXInst
->m_uIndex
= pShader
->lstTEXInstructions
.uNumOfNode
;
228 AddInstToList(&(pShader
->lstTEXInstructions
),
229 (R700ShaderInstruction
*)pTEXInst
);
230 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pTEXInst
->m_ShaderInstType
);
232 pShader
->nRegs
= (pShader
->nRegs
< pTEXInst
->m_Word1
.f
.dst_gpr
) ? pTEXInst
->m_Word1
.f
.dst_gpr
: pShader
->nRegs
;
234 pShader
->bLinksDirty
= GL_TRUE
;
235 pShader
->bNeedsAssembly
= GL_TRUE
;
237 pTEXInst
->useCount
++;
240 void AddALUInstruction(R700_Shader
*pShader
, R700ALUInstruction
*pALUInst
)
242 pALUInst
->m_uIndex
= pShader
->lstALUInstructions
.uNumOfNode
;
243 AddInstToList(&(pShader
->lstALUInstructions
),
244 (R700ShaderInstruction
*)pALUInst
);
245 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pALUInst
->m_ShaderInstType
);
247 pShader
->nRegs
= (pShader
->nRegs
< pALUInst
->m_Word1
.f
.dst_gpr
) ? pALUInst
->m_Word1
.f
.dst_gpr
: pShader
->nRegs
;
249 pShader
->bLinksDirty
= GL_TRUE
;
250 pShader
->bNeedsAssembly
= GL_TRUE
;
252 pALUInst
->useCount
++;
255 void ResolveLinks(R700_Shader
*pShader
)
258 R700ShaderInstruction
*pInst
;
259 R700ALUInstruction
*pALUinst
;
260 R700TextureInstruction
*pTEXinst
;
261 R700VertexInstruction
*pVTXinst
;
265 GLuint cfOffset
= 0x0;
267 GLuint aluOffset
= cfOffset
+ pShader
->lstCFInstructions
.uNumOfNode
* GetInstructionSize(SIT_CF
);
269 GLuint texOffset
= aluOffset
; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
271 pInst
= pShader
->lstALUInstructions
.pHead
;
274 texOffset
+= GetInstructionSize(pInst
->m_ShaderInstType
);
276 pInst
= pInst
->pNextInst
;
279 vtxOffset
= texOffset
+ pShader
->lstTEXInstructions
.uNumOfNode
* GetInstructionSize(SIT_TEX
);
281 if ( ((pShader
->lstTEXInstructions
.uNumOfNode
> 0) && (texOffset
% 4 != 0)) ||
282 ((pShader
->lstVTXInstructions
.uNumOfNode
> 0) && (vtxOffset
% 4 != 0)) )
284 pALUinst
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
285 Init_R700ALUInstruction(pALUinst
);
286 AddALUInstruction(pShader
, pALUinst
);
287 texOffset
+= GetInstructionSize(SIT_ALU
);
288 vtxOffset
+= GetInstructionSize(SIT_ALU
);
291 pInst
= pShader
->lstALUInstructions
.pHead
;
295 pALUinst
= (R700ALUInstruction
*)pInst
;
297 if(pALUinst
->m_pLinkedALUClause
!= NULL
)
299 // This address is quad-word aligned
300 pALUinst
->m_pLinkedALUClause
->m_Word0
.f
.addr
= (aluOffset
+ uiSize
) >> 1;
303 uiSize
+= GetInstructionSize(pALUinst
->m_ShaderInstType
);
305 pInst
= pInst
->pNextInst
;
308 pInst
= pShader
->lstTEXInstructions
.pHead
;
312 pTEXinst
= (R700TextureInstruction
*)pInst
;
314 if (pTEXinst
->m_pLinkedGenericClause
!= NULL
)
316 pTEXinst
->m_pLinkedGenericClause
->m_Word0
.f
.addr
= (texOffset
+ uiSize
) >> 1;
319 uiSize
+= GetInstructionSize(pTEXinst
->m_ShaderInstType
);
321 pInst
= pInst
->pNextInst
;
324 pInst
= pShader
->lstVTXInstructions
.pHead
;
328 pVTXinst
= (R700VertexInstruction
*)pInst
;
330 if (pVTXinst
->m_pLinkedGenericClause
!= NULL
)
332 pVTXinst
->m_pLinkedGenericClause
->m_Word0
.f
.addr
= (vtxOffset
+ uiSize
) >> 1;
335 uiSize
+= GetInstructionSize(pVTXinst
->m_ShaderInstType
);
337 pInst
= pInst
->pNextInst
;
340 pShader
->bLinksDirty
= GL_FALSE
;
343 void Assemble(R700_Shader
*pShader
)
346 GLuint
*pShaderBinary
;
347 GLuint size_of_program
;
350 GLuint end_of_cf_instructions
;
351 GLuint number_of_alu_dwords
;
353 R700ShaderInstruction
*pInst
;
355 if(GL_TRUE
== pShader
->bBinaryShader
)
360 if(pShader
->bLinksDirty
== GL_TRUE
)
362 ResolveLinks(pShader
);
365 size_of_program
= pShader
->uShaderBinaryDWORDSize
;
367 pShaderBinary
= (GLuint
*) MALLOC(sizeof(GLuint
)*size_of_program
);
369 pCurrPos
= pShaderBinary
;
371 for (i
= 0; i
< size_of_program
; i
++)
373 pShaderBinary
[i
] = 0;
376 pInst
= pShader
->lstCFInstructions
.pHead
;
379 switch (pInst
->m_ShaderInstType
)
383 R700ControlFlowGenericClause
* pCFgeneric
= (R700ControlFlowGenericClause
*)pInst
;
384 *pCurrPos
++ = pCFgeneric
->m_Word0
.val
;
385 *pCurrPos
++ = pCFgeneric
->m_Word1
.val
;
390 R700ControlFlowALUClause
* pCFalu
= (R700ControlFlowALUClause
*)pInst
;
391 *pCurrPos
++ = pCFalu
->m_Word0
.val
;
392 *pCurrPos
++ = pCFalu
->m_Word1
.val
;
395 case SIT_CF_ALL_EXP_SX
:
397 R700ControlFlowSXClause
* pCFsx
= (R700ControlFlowSXClause
*)pInst
;
398 *pCurrPos
++ = pCFsx
->m_Word0
.val
;
399 *pCurrPos
++ = (pCFsx
->m_Word1
.val
| pCFsx
->m_Word1_SWIZ
.val
);
402 case SIT_CF_ALL_EXP_SMX
:
404 R700ControlFlowSMXClause
* pCFsmx
= (R700ControlFlowSMXClause
*)pInst
;
405 *pCurrPos
++ = pCFsmx
->m_Word0
.val
;
406 *pCurrPos
++ = (pCFsmx
->m_Word1
.val
| pCFsmx
->m_Word1_BUF
.val
);
413 pInst
= pInst
->pNextInst
;
416 number_of_alu_dwords
= 0;
417 pInst
= pShader
->lstALUInstructions
.pHead
;
420 switch (pInst
->m_ShaderInstType
)
424 R700ALUInstruction
* pALU
= (R700ALUInstruction
*)pInst
;
426 *pCurrPos
++ = pALU
->m_Word0
.val
;
427 *pCurrPos
++ = (pALU
->m_Word1
.val
| pALU
->m_Word1_OP2
.val
| pALU
->m_Word1_OP3
.val
);
429 number_of_alu_dwords
+= 2;
432 case SIT_ALU_HALF_LIT
:
434 R700ALUInstructionHalfLiteral
* pALUhalf
= (R700ALUInstructionHalfLiteral
*)pInst
;
436 *pCurrPos
++ = pALUhalf
->m_Word0
.val
;
437 *pCurrPos
++ = (pALUhalf
->m_Word1
.val
| pALUhalf
->m_Word1_OP2
.val
| pALUhalf
->m_Word1_OP3
.val
);
438 *pCurrPos
++ = *((GLuint
*)&(pALUhalf
->m_fLiteralX
));
439 *pCurrPos
++ = *((GLuint
*)&(pALUhalf
->m_fLiteralY
));
441 number_of_alu_dwords
+= 4;
444 case SIT_ALU_FALL_LIT
:
446 R700ALUInstructionFullLiteral
* pALUfull
= (R700ALUInstructionFullLiteral
*)pInst
;
448 *pCurrPos
++ = pALUfull
->m_Word0
.val
;
449 *pCurrPos
++ = (pALUfull
->m_Word1
.val
| pALUfull
->m_Word1_OP2
.val
| pALUfull
->m_Word1_OP3
.val
);
451 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralX
));
452 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralY
));
453 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralZ
));
454 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralW
));
456 number_of_alu_dwords
+= 6;
463 pInst
= pInst
->pNextInst
;
466 pInst
= pShader
->lstTEXInstructions
.pHead
;
469 R700TextureInstruction
* pTEX
= (R700TextureInstruction
*)pInst
;
471 *pCurrPos
++ = pTEX
->m_Word0
.val
;
472 *pCurrPos
++ = pTEX
->m_Word1
.val
;
473 *pCurrPos
++ = pTEX
->m_Word2
.val
;
474 *pCurrPos
++ = 0x0beadeaf;
476 pInst
= pInst
->pNextInst
;
479 pInst
= pShader
->lstVTXInstructions
.pHead
;
482 switch (pInst
->m_ShaderInstType
)
486 R700VertexSemanticFetch
* pVTXsem
= (R700VertexSemanticFetch
*)pInst
;
488 *pCurrPos
++ = pVTXsem
->m_Word0
.val
;
489 *pCurrPos
++ = (pVTXsem
->m_Word1
.val
| pVTXsem
->m_Word1_SEM
.val
);
490 *pCurrPos
++ = pVTXsem
->m_Word2
.val
;
491 *pCurrPos
++ = 0x0beadeaf;
494 case SIT_VTX_GENERIC
: //
496 R700VertexGenericFetch
* pVTXgeneric
= (R700VertexGenericFetch
*)pInst
;
498 *pCurrPos
++ = pVTXgeneric
->m_Word0
.val
;
499 *pCurrPos
++ = (pVTXgeneric
->m_Word1
.val
| pVTXgeneric
->m_Word1_GPR
.val
);
500 *pCurrPos
++ = pVTXgeneric
->m_Word2
.val
;
501 *pCurrPos
++ = 0x0beadeaf;
508 pInst
= pInst
->pNextInst
;
511 if(NULL
!= pShader
->pProgram
)
513 FREE(pShader
->pProgram
);
515 pShader
->pProgram
= (GLubyte
*)pShaderBinary
;
517 end_of_cf_instructions
= pShader
->uCFOffset
+ pShader
->lstCFInstructions
.uNumOfNode
* GetInstructionSize(SIT_CF
);
519 pShader
->uEndOfCF
= end_of_cf_instructions
>> 1;
521 pShader
->uEndOfALU
= (end_of_cf_instructions
+ number_of_alu_dwords
) >> 1;
523 pShader
->uEndOfFetch
= (pShader
->uCFOffset
+ pShader
->uShaderBinaryDWORDSize
) >> 1;
525 pShader
->bNeedsAssembly
= GL_FALSE
;
528 void LoadProgram(R700_Shader
*pShader
) //context
532 void UpdateShaderRegisters(R700_Shader
*pShader
) //context
536 void DeleteInstructions(R700_Shader
*pShader
)
540 void DebugPrint(void)
544 void cleanup_vfetch_shaderinst(R700_Shader
*pShader
)
546 R700ShaderInstruction
*pInst
;
547 R700ShaderInstruction
*pInstToFree
;
548 R700VertexInstruction
*pVTXInst
;
549 R700ControlFlowInstruction
*pCFInst
;
551 pInst
= pShader
->lstVTXInstructions
.pHead
;
554 pVTXInst
= (R700VertexInstruction
*)pInst
;
555 pShader
->uShaderBinaryDWORDSize
-= GetInstructionSize(pVTXInst
->m_ShaderInstType
);
557 if(NULL
!= pVTXInst
->m_pLinkedGenericClause
)
559 pCFInst
= (R700ControlFlowInstruction
*)(pVTXInst
->m_pLinkedGenericClause
);
561 TakeInstOutFromList(&(pShader
->lstCFInstructions
),
562 (R700ShaderInstruction
*)pCFInst
);
564 pShader
->uShaderBinaryDWORDSize
-= GetInstructionSize(pCFInst
->m_ShaderInstType
);
567 pInst
= pInst
->pNextInst
;
570 //destroy each item in pShader->lstVTXInstructions;
571 pInst
= pShader
->lstVTXInstructions
.pHead
;
575 pInst
= pInst
->pNextInst
;
579 //set NULL pShader->lstVTXInstructions
580 pShader
->lstVTXInstructions
.pHead
=NULL
;
581 pShader
->lstVTXInstructions
.pTail
=NULL
;
582 pShader
->lstVTXInstructions
.uNumOfNode
=0;
585 void Clean_Up_Shader(R700_Shader
*pShader
)
587 if(NULL
!= pShader
->pProgram
)
589 FREE(pShader
->pProgram
);
590 pShader
->pProgram
= NULL
;
593 R700ShaderInstruction
*pInst
;
594 R700ShaderInstruction
*pInstToFree
;
596 pInst
= pShader
->lstCFInstructions
.pHead
;
600 pInst
= pInst
->pNextInst
;
603 pShader
->lstCFInstructions
.pHead
= NULL
;
605 pInst
= pShader
->lstALUInstructions
.pHead
;
609 pInst
= pInst
->pNextInst
;
612 pShader
->lstALUInstructions
.pHead
= NULL
;
614 pInst
= pShader
->lstTEXInstructions
.pHead
;
618 pInst
= pInst
->pNextInst
;
621 pShader
->lstTEXInstructions
.pHead
= NULL
;
623 pInst
= pShader
->lstVTXInstructions
.pHead
;
627 pInst
= pInst
->pNextInst
;
630 pShader
->lstVTXInstructions
.pHead
= NULL
;