2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
33 #include "main/imports.h"
35 #include "main/glheader.h"
37 #include "r600_context.h"
38 #include "r700_debug.h"
40 #include "r700_shader.h"
42 void r700ShaderInit(GLcontext
* ctx
)
46 void AddInstToList(TypedShaderList
* plstCFInstructions
, R700ShaderInstruction
* pInst
)
48 if(NULL
== plstCFInstructions
->pTail
)
50 plstCFInstructions
->pHead
= pInst
;
51 plstCFInstructions
->pTail
= pInst
;
55 plstCFInstructions
->pTail
->pNextInst
= pInst
;
56 plstCFInstructions
->pTail
= pInst
;
58 pInst
->pNextInst
= NULL
;
60 plstCFInstructions
->uNumOfNode
++;
63 void Init_R700_Shader(R700_Shader
* pShader
)
65 pShader
->Type
= R700_SHADER_INVALID
;
66 pShader
->pProgram
= NULL
;
67 pShader
->bBinaryShader
= GL_FALSE
;
68 pShader
->bFetchShaderRequired
= GL_FALSE
;
69 pShader
->bNeedsAssembly
= GL_FALSE
;
70 pShader
->bLinksDirty
= GL_TRUE
;
71 pShader
->uShaderBinaryDWORDSize
= 0;
73 pShader
->nParamExports
= 0;
74 pShader
->nMemExports
= 0;
75 pShader
->resource
= 0;
77 pShader
->exportMode
= 0;
78 pShader
->depthIsImported
= GL_FALSE
;
80 pShader
->positionVectorIsExported
= GL_FALSE
;
81 pShader
->miscVectorIsExported
= GL_FALSE
;
82 pShader
->renderTargetArrayIndexIsExported
= GL_FALSE
;
83 pShader
->ccDist0VectorIsExported
= GL_FALSE
;
84 pShader
->ccDist1VectorIsExported
= GL_FALSE
;
87 pShader
->depthIsExported
= GL_FALSE
;
88 pShader
->stencilRefIsExported
= GL_FALSE
;
89 pShader
->coverageToMaskIsExported
= GL_FALSE
;
90 pShader
->maskIsExported
= GL_FALSE
;
91 pShader
->killIsUsed
= GL_FALSE
;
93 pShader
->uCFOffset
= 0;
94 pShader
->uStackSize
= 0;
95 pShader
->uMaxCallDepth
= 0;
97 pShader
->bSurfAllocated
= GL_FALSE
;
99 pShader
->lstCFInstructions
.pHead
=NULL
;
100 pShader
->lstCFInstructions
.pTail
=NULL
;
101 pShader
->lstCFInstructions
.uNumOfNode
=0;
102 pShader
->lstALUInstructions
.pHead
=NULL
;
103 pShader
->lstALUInstructions
.pTail
=NULL
;
104 pShader
->lstALUInstructions
.uNumOfNode
=0;
105 pShader
->lstTEXInstructions
.pHead
=NULL
;
106 pShader
->lstTEXInstructions
.pTail
=NULL
;
107 pShader
->lstTEXInstructions
.uNumOfNode
=0;
108 pShader
->lstVTXInstructions
.pHead
=NULL
;
109 pShader
->lstVTXInstructions
.pTail
=NULL
;
110 pShader
->lstVTXInstructions
.uNumOfNode
=0;
113 void AddCFInstruction(R700_Shader
*pShader
, R700ControlFlowInstruction
*pCFInst
)
115 R700ControlFlowSXClause
* pSXClause
;
116 R700ControlFlowSMXClause
* pSMXClause
;
118 pCFInst
->m_uIndex
= pShader
->lstCFInstructions
.uNumOfNode
;
119 AddInstToList(&(pShader
->lstCFInstructions
),
120 (R700ShaderInstruction
*)pCFInst
);
121 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pCFInst
->m_ShaderInstType
);
125 switch (pCFInst
->m_ShaderInstType
)
127 case SIT_CF_ALL_EXP_SX
:
128 pSXClause
= (R700ControlFlowSXClause
*)pCFInst
;
130 case SIT_CF_ALL_EXP_SMX
:
131 pSMXClause
= (R700ControlFlowSMXClause
*)pCFInst
;
137 if((pSXClause
!= NULL
) && (pSXClause
->m_Word0
.f
.type
== SQ_EXPORT_PARAM
))
139 pShader
->nParamExports
+= pSXClause
->m_Word1
.f
.burst_count
+ 1;
141 else if ((pSMXClause
!= NULL
) && (pSMXClause
->m_Word1
.f
.cf_inst
== SQ_CF_INST_MEM_RING
) &&
142 (pSMXClause
->m_Word0
.f
.type
== SQ_EXPORT_WRITE
|| pSMXClause
->m_Word0
.f
.type
== SQ_EXPORT_WRITE_IND
))
144 pShader
->nMemExports
+= pSMXClause
->m_Word1
.f
.burst_count
+ 1;
147 pShader
->bLinksDirty
= GL_TRUE
;
148 pShader
->bNeedsAssembly
= GL_TRUE
;
153 void AddVTXInstruction(R700_Shader
*pShader
, R700VertexInstruction
*pVTXInst
)
155 pVTXInst
->m_uIndex
= pShader
->lstVTXInstructions
.uNumOfNode
;
156 AddInstToList(&(pShader
->lstVTXInstructions
),
157 (R700ShaderInstruction
*)pVTXInst
);
158 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pVTXInst
->m_ShaderInstType
);
160 if(pVTXInst
->m_ShaderInstType
== SIT_VTX_GENERIC
)
162 R700VertexGenericFetch
* pVTXGenericClause
= (R700VertexGenericFetch
*)pVTXInst
;
163 pShader
->nRegs
= (pShader
->nRegs
< pVTXGenericClause
->m_Word1_GPR
.f
.dst_gpr
) ? pVTXGenericClause
->m_Word1_GPR
.f
.dst_gpr
: pShader
->nRegs
;
166 pShader
->bLinksDirty
= GL_TRUE
;
167 pShader
->bNeedsAssembly
= GL_TRUE
;
169 pVTXInst
->useCount
++;
172 void AddTEXInstruction(R700_Shader
*pShader
, R700TextureInstruction
*pTEXInst
)
174 pTEXInst
->m_uIndex
= pShader
->lstTEXInstructions
.uNumOfNode
;
175 AddInstToList(&(pShader
->lstTEXInstructions
),
176 (R700ShaderInstruction
*)pTEXInst
);
177 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pTEXInst
->m_ShaderInstType
);
179 pShader
->nRegs
= (pShader
->nRegs
< pTEXInst
->m_Word1
.f
.dst_gpr
) ? pTEXInst
->m_Word1
.f
.dst_gpr
: pShader
->nRegs
;
181 pShader
->bLinksDirty
= GL_TRUE
;
182 pShader
->bNeedsAssembly
= GL_TRUE
;
184 pTEXInst
->useCount
++;
187 void AddALUInstruction(R700_Shader
*pShader
, R700ALUInstruction
*pALUInst
)
189 pALUInst
->m_uIndex
= pShader
->lstALUInstructions
.uNumOfNode
;
190 AddInstToList(&(pShader
->lstALUInstructions
),
191 (R700ShaderInstruction
*)pALUInst
);
192 pShader
->uShaderBinaryDWORDSize
+= GetInstructionSize(pALUInst
->m_ShaderInstType
);
194 pShader
->nRegs
= (pShader
->nRegs
< pALUInst
->m_Word1
.f
.dst_gpr
) ? pALUInst
->m_Word1
.f
.dst_gpr
: pShader
->nRegs
;
196 pShader
->bLinksDirty
= GL_TRUE
;
197 pShader
->bNeedsAssembly
= GL_TRUE
;
199 pALUInst
->useCount
++;
202 void ResolveLinks(R700_Shader
*pShader
)
205 R700ShaderInstruction
*pInst
;
206 R700ALUInstruction
*pALUinst
;
207 R700TextureInstruction
*pTEXinst
;
208 R700VertexInstruction
*pVTXinst
;
212 GLuint cfOffset
= 0x0;
214 GLuint aluOffset
= cfOffset
+ pShader
->lstCFInstructions
.uNumOfNode
* GetInstructionSize(SIT_CF
);
216 GLuint texOffset
= aluOffset
; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
218 pInst
= pShader
->lstALUInstructions
.pHead
;
221 texOffset
+= GetInstructionSize(pInst
->m_ShaderInstType
);
223 pInst
= pInst
->pNextInst
;
226 vtxOffset
= texOffset
+ pShader
->lstTEXInstructions
.uNumOfNode
* GetInstructionSize(SIT_TEX
);
228 if ( ((pShader
->lstTEXInstructions
.uNumOfNode
> 0) && (texOffset
% 4 != 0)) ||
229 ((pShader
->lstVTXInstructions
.uNumOfNode
> 0) && (vtxOffset
% 4 != 0)) )
231 pALUinst
= (R700ALUInstruction
*) CALLOC_STRUCT(R700ALUInstruction
);
232 Init_R700ALUInstruction(pALUinst
);
233 AddALUInstruction(pShader
, pALUinst
);
234 texOffset
+= GetInstructionSize(SIT_ALU
);
235 vtxOffset
+= GetInstructionSize(SIT_ALU
);
238 pInst
= pShader
->lstALUInstructions
.pHead
;
242 pALUinst
= (R700ALUInstruction
*)pInst
;
244 if(pALUinst
->m_pLinkedALUClause
!= NULL
)
246 // This address is quad-word aligned
247 pALUinst
->m_pLinkedALUClause
->m_Word0
.f
.addr
= (aluOffset
+ uiSize
) >> 1;
250 uiSize
+= GetInstructionSize(pALUinst
->m_ShaderInstType
);
252 pInst
= pInst
->pNextInst
;
255 pInst
= pShader
->lstTEXInstructions
.pHead
;
259 pTEXinst
= (R700TextureInstruction
*)pInst
;
261 if (pTEXinst
->m_pLinkedGenericClause
!= NULL
)
263 pTEXinst
->m_pLinkedGenericClause
->m_Word0
.f
.addr
= (texOffset
+ uiSize
) >> 1;
266 uiSize
+= GetInstructionSize(pTEXinst
->m_ShaderInstType
);
268 pInst
= pInst
->pNextInst
;
271 pInst
= pShader
->lstVTXInstructions
.pHead
;
275 pVTXinst
= (R700VertexInstruction
*)pInst
;
277 if (pVTXinst
->m_pLinkedGenericClause
!= NULL
)
279 pVTXinst
->m_pLinkedGenericClause
->m_Word0
.f
.addr
= (vtxOffset
+ uiSize
) >> 1;
282 uiSize
+= GetInstructionSize(pVTXinst
->m_ShaderInstType
);
284 pInst
= pInst
->pNextInst
;
287 pShader
->bLinksDirty
= GL_FALSE
;
290 void Assemble(R700_Shader
*pShader
)
293 GLuint
*pShaderBinary
;
294 GLuint size_of_program
;
297 GLuint end_of_cf_instructions
;
298 GLuint number_of_alu_dwords
;
300 R700ShaderInstruction
*pInst
;
302 if(GL_TRUE
== pShader
->bBinaryShader
)
307 if(pShader
->bLinksDirty
== GL_TRUE
)
309 ResolveLinks(pShader
);
312 size_of_program
= pShader
->uShaderBinaryDWORDSize
;
314 pShaderBinary
= (GLuint
*) MALLOC(sizeof(GLuint
)*size_of_program
);
316 pCurrPos
= pShaderBinary
;
318 for (i
= 0; i
< size_of_program
; i
++)
320 pShaderBinary
[i
] = 0;
323 pInst
= pShader
->lstCFInstructions
.pHead
;
326 switch (pInst
->m_ShaderInstType
)
330 R700ControlFlowGenericClause
* pCFgeneric
= (R700ControlFlowGenericClause
*)pInst
;
331 *pCurrPos
++ = pCFgeneric
->m_Word0
.val
;
332 *pCurrPos
++ = pCFgeneric
->m_Word1
.val
;
337 R700ControlFlowALUClause
* pCFalu
= (R700ControlFlowALUClause
*)pInst
;
338 *pCurrPos
++ = pCFalu
->m_Word0
.val
;
339 *pCurrPos
++ = pCFalu
->m_Word1
.val
;
342 case SIT_CF_ALL_EXP_SX
:
344 R700ControlFlowSXClause
* pCFsx
= (R700ControlFlowSXClause
*)pInst
;
345 *pCurrPos
++ = pCFsx
->m_Word0
.val
;
346 *pCurrPos
++ = (pCFsx
->m_Word1
.val
| pCFsx
->m_Word1_SWIZ
.val
);
349 case SIT_CF_ALL_EXP_SMX
:
351 R700ControlFlowSMXClause
* pCFsmx
= (R700ControlFlowSMXClause
*)pInst
;
352 *pCurrPos
++ = pCFsmx
->m_Word0
.val
;
353 *pCurrPos
++ = (pCFsmx
->m_Word1
.val
| pCFsmx
->m_Word1_BUF
.val
);
360 pInst
= pInst
->pNextInst
;
363 number_of_alu_dwords
= 0;
364 pInst
= pShader
->lstALUInstructions
.pHead
;
367 switch (pInst
->m_ShaderInstType
)
371 R700ALUInstruction
* pALU
= (R700ALUInstruction
*)pInst
;
373 *pCurrPos
++ = pALU
->m_Word0
.val
;
374 *pCurrPos
++ = (pALU
->m_Word1
.val
| pALU
->m_Word1_OP2
.val
| pALU
->m_Word1_OP3
.val
);
376 number_of_alu_dwords
+= 2;
379 case SIT_ALU_HALF_LIT
:
381 R700ALUInstructionHalfLiteral
* pALUhalf
= (R700ALUInstructionHalfLiteral
*)pInst
;
383 *pCurrPos
++ = pALUhalf
->m_Word0
.val
;
384 *pCurrPos
++ = (pALUhalf
->m_Word1
.val
| pALUhalf
->m_Word1_OP2
.val
| pALUhalf
->m_Word1_OP3
.val
);
385 *pCurrPos
++ = *((GLuint
*)&(pALUhalf
->m_fLiteralX
));
386 *pCurrPos
++ = *((GLuint
*)&(pALUhalf
->m_fLiteralY
));
388 number_of_alu_dwords
+= 4;
391 case SIT_ALU_FALL_LIT
:
393 R700ALUInstructionFullLiteral
* pALUfull
= (R700ALUInstructionFullLiteral
*)pInst
;
395 *pCurrPos
++ = pALUfull
->m_Word0
.val
;
396 *pCurrPos
++ = (pALUfull
->m_Word1
.val
| pALUfull
->m_Word1_OP2
.val
| pALUfull
->m_Word1_OP3
.val
);
398 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralX
));
399 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralY
));
400 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralZ
));
401 *pCurrPos
++ = *((GLuint
*)&(pALUfull
->m_fLiteralW
));
403 number_of_alu_dwords
+= 6;
410 pInst
= pInst
->pNextInst
;
413 pInst
= pShader
->lstTEXInstructions
.pHead
;
416 R700TextureInstruction
* pTEX
= (R700TextureInstruction
*)pInst
;
418 *pCurrPos
++ = pTEX
->m_Word0
.val
;
419 *pCurrPos
++ = pTEX
->m_Word1
.val
;
420 *pCurrPos
++ = pTEX
->m_Word2
.val
;
421 *pCurrPos
++ = 0x0beadeaf;
423 pInst
= pInst
->pNextInst
;
426 pInst
= pShader
->lstVTXInstructions
.pHead
;
429 switch (pInst
->m_ShaderInstType
)
433 R700VertexSemanticFetch
* pVTXsem
= (R700VertexSemanticFetch
*)pInst
;
435 *pCurrPos
++ = pVTXsem
->m_Word0
.val
;
436 *pCurrPos
++ = (pVTXsem
->m_Word1
.val
| pVTXsem
->m_Word1_SEM
.val
);
437 *pCurrPos
++ = pVTXsem
->m_Word2
.val
;
438 *pCurrPos
++ = 0x0beadeaf;
441 case SIT_VTX_GENERIC
: //
443 R700VertexGenericFetch
* pVTXgeneric
= (R700VertexGenericFetch
*)pInst
;
445 *pCurrPos
++ = pVTXgeneric
->m_Word0
.val
;
446 *pCurrPos
++ = (pVTXgeneric
->m_Word1
.val
| pVTXgeneric
->m_Word1_GPR
.val
);
447 *pCurrPos
++ = pVTXgeneric
->m_Word2
.val
;
448 *pCurrPos
++ = 0x0beadeaf;
455 pInst
= pInst
->pNextInst
;
458 if(NULL
!= pShader
->pProgram
)
460 FREE(pShader
->pProgram
);
462 pShader
->pProgram
= (GLubyte
*)pShaderBinary
;
464 end_of_cf_instructions
= pShader
->uCFOffset
+ pShader
->lstCFInstructions
.uNumOfNode
* GetInstructionSize(SIT_CF
);
466 pShader
->uEndOfCF
= end_of_cf_instructions
>> 1;
468 pShader
->uEndOfALU
= (end_of_cf_instructions
+ number_of_alu_dwords
) >> 1;
470 pShader
->uEndOfFetch
= (pShader
->uCFOffset
+ pShader
->uShaderBinaryDWORDSize
) >> 1;
472 pShader
->bNeedsAssembly
= GL_FALSE
;
475 void LoadProgram(R700_Shader
*pShader
) //context
479 void UpdateShaderRegisters(R700_Shader
*pShader
) //context
483 void DeleteInstructions(R700_Shader
*pShader
)
487 void DebugPrint(void)
491 void Clean_Up_Shader(R700_Shader
*pShader
)
493 FREE(pShader
->pProgram
);
495 R700ShaderInstruction
*pInst
;
496 R700ShaderInstruction
*pInstToFree
;
498 pInst
= pShader
->lstCFInstructions
.pHead
;
502 pInst
= pInst
->pNextInst
;
505 pInst
= pShader
->lstALUInstructions
.pHead
;
509 pInst
= pInst
->pNextInst
;
512 pInst
= pShader
->lstTEXInstructions
.pHead
;
516 pInst
= pInst
->pNextInst
;
519 pInst
= pShader
->lstVTXInstructions
.pHead
;
523 pInst
= pInst
->pNextInst
;