Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_shader.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/imports.h"
34
35 #include "main/glheader.h"
36
37 #include "r600_context.h"
38 #include "r700_debug.h"
39
40 #include "r700_shader.h"
41
42 void r700ShaderInit(GLcontext * ctx)
43 {
44 }
45
46 void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
47 {
48 if(NULL == plstCFInstructions->pTail)
49 { //first
50 plstCFInstructions->pHead = pInst;
51 plstCFInstructions->pTail = pInst;
52 }
53 else
54 {
55 plstCFInstructions->pTail->pNextInst = pInst;
56 plstCFInstructions->pTail = pInst;
57 }
58 pInst->pNextInst = NULL;
59
60 plstCFInstructions->uNumOfNode++;
61 }
62
63 void Init_R700_Shader(R700_Shader * pShader)
64 {
65 pShader->Type = R700_SHADER_INVALID;
66 pShader->pProgram = NULL;
67 pShader->bBinaryShader = GL_FALSE;
68 pShader->bFetchShaderRequired = GL_FALSE;
69 pShader->bNeedsAssembly = GL_FALSE;
70 pShader->bLinksDirty = GL_TRUE;
71 pShader->uShaderBinaryDWORDSize = 0;
72 pShader->nRegs = 0;
73 pShader->nParamExports = 0;
74 pShader->nMemExports = 0;
75 pShader->resource = 0;
76
77 pShader->exportMode = 0;
78 pShader->depthIsImported = GL_FALSE;
79
80 pShader->positionVectorIsExported = GL_FALSE;
81 pShader->miscVectorIsExported = GL_FALSE;
82 pShader->renderTargetArrayIndexIsExported = GL_FALSE;
83 pShader->ccDist0VectorIsExported = GL_FALSE;
84 pShader->ccDist1VectorIsExported = GL_FALSE;
85
86
87 pShader->depthIsExported = GL_FALSE;
88 pShader->stencilRefIsExported = GL_FALSE;
89 pShader->coverageToMaskIsExported = GL_FALSE;
90 pShader->maskIsExported = GL_FALSE;
91 pShader->killIsUsed = GL_FALSE;
92
93 pShader->uCFOffset = 0;
94 pShader->uStackSize = 0;
95 pShader->uMaxCallDepth = 0;
96
97 pShader->bSurfAllocated = GL_FALSE;
98
99 pShader->lstCFInstructions.pHead=NULL;
100 pShader->lstCFInstructions.pTail=NULL;
101 pShader->lstCFInstructions.uNumOfNode=0;
102 pShader->lstALUInstructions.pHead=NULL;
103 pShader->lstALUInstructions.pTail=NULL;
104 pShader->lstALUInstructions.uNumOfNode=0;
105 pShader->lstTEXInstructions.pHead=NULL;
106 pShader->lstTEXInstructions.pTail=NULL;
107 pShader->lstTEXInstructions.uNumOfNode=0;
108 pShader->lstVTXInstructions.pHead=NULL;
109 pShader->lstVTXInstructions.pTail=NULL;
110 pShader->lstVTXInstructions.uNumOfNode=0;
111 }
112
113 void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
114 {
115 R700ControlFlowSXClause* pSXClause;
116 R700ControlFlowSMXClause* pSMXClause;
117
118 pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode;
119 AddInstToList(&(pShader->lstCFInstructions),
120 (R700ShaderInstruction*)pCFInst);
121 pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
122
123 pSXClause = NULL;
124 pSMXClause = NULL;
125 switch (pCFInst->m_ShaderInstType)
126 {
127 case SIT_CF_ALL_EXP_SX:
128 pSXClause = (R700ControlFlowSXClause*)pCFInst;
129 break;
130 case SIT_CF_ALL_EXP_SMX:
131 pSMXClause = (R700ControlFlowSMXClause*)pCFInst;
132 break;
133 default:
134 break;
135 };
136
137 if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM))
138 {
139 pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1;
140 }
141 else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) &&
142 (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND))
143 {
144 pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1;
145 }
146
147 pShader->bLinksDirty = GL_TRUE;
148 pShader->bNeedsAssembly = GL_TRUE;
149
150 pCFInst->useCount++;
151 }
152
153 void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst)
154 {
155 pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode;
156 AddInstToList(&(pShader->lstVTXInstructions),
157 (R700ShaderInstruction*)pVTXInst);
158 pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType);
159
160 if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC)
161 {
162 R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst;
163 pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs;
164 }
165
166 pShader->bLinksDirty = GL_TRUE;
167 pShader->bNeedsAssembly = GL_TRUE;
168
169 pVTXInst->useCount++;
170 }
171
172 void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst)
173 {
174 pTEXInst->m_uIndex = pShader->lstTEXInstructions.uNumOfNode;
175 AddInstToList(&(pShader->lstTEXInstructions),
176 (R700ShaderInstruction*)pTEXInst);
177 pShader->uShaderBinaryDWORDSize += GetInstructionSize(pTEXInst->m_ShaderInstType);
178
179 pShader->nRegs = (pShader->nRegs < pTEXInst->m_Word1.f.dst_gpr) ? pTEXInst->m_Word1.f.dst_gpr : pShader->nRegs;
180
181 pShader->bLinksDirty = GL_TRUE;
182 pShader->bNeedsAssembly = GL_TRUE;
183
184 pTEXInst->useCount++;
185 }
186
187 void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst)
188 {
189 pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode;
190 AddInstToList(&(pShader->lstALUInstructions),
191 (R700ShaderInstruction*)pALUInst);
192 pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType);
193
194 pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs;
195
196 pShader->bLinksDirty = GL_TRUE;
197 pShader->bNeedsAssembly = GL_TRUE;
198
199 pALUInst->useCount++;
200 }
201
202 void ResolveLinks(R700_Shader *pShader)
203 {
204 GLuint uiSize;
205 R700ShaderInstruction *pInst;
206 R700ALUInstruction *pALUinst;
207 R700TextureInstruction *pTEXinst;
208 R700VertexInstruction *pVTXinst;
209
210 GLuint vtxOffset;
211
212 GLuint cfOffset = 0x0;
213
214 GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
215
216 GLuint texOffset = aluOffset; // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
217
218 pInst = pShader->lstALUInstructions.pHead;
219 while(NULL != pInst)
220 {
221 texOffset += GetInstructionSize(pInst->m_ShaderInstType);
222
223 pInst = pInst->pNextInst;
224 };
225
226 vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX);
227
228 if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) ||
229 ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0)) )
230 {
231 pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
232 Init_R700ALUInstruction(pALUinst);
233 AddALUInstruction(pShader, pALUinst);
234 texOffset += GetInstructionSize(SIT_ALU);
235 vtxOffset += GetInstructionSize(SIT_ALU);
236 }
237
238 pInst = pShader->lstALUInstructions.pHead;
239 uiSize = 0;
240 while(NULL != pInst)
241 {
242 pALUinst = (R700ALUInstruction*)pInst;
243
244 if(pALUinst->m_pLinkedALUClause != NULL)
245 {
246 // This address is quad-word aligned
247 pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1;
248 }
249
250 uiSize += GetInstructionSize(pALUinst->m_ShaderInstType);
251
252 pInst = pInst->pNextInst;
253 };
254
255 pInst = pShader->lstTEXInstructions.pHead;
256 uiSize = 0;
257 while(NULL != pInst)
258 {
259 pTEXinst = (R700TextureInstruction*)pInst;
260
261 if (pTEXinst->m_pLinkedGenericClause != NULL)
262 {
263 pTEXinst->m_pLinkedGenericClause->m_Word0.f.addr = (texOffset + uiSize) >> 1;
264 }
265
266 uiSize += GetInstructionSize(pTEXinst->m_ShaderInstType);
267
268 pInst = pInst->pNextInst;
269 };
270
271 pInst = pShader->lstVTXInstructions.pHead;
272 uiSize = 0;
273 while(NULL != pInst)
274 {
275 pVTXinst = (R700VertexInstruction*)pInst;
276
277 if (pVTXinst->m_pLinkedGenericClause != NULL)
278 {
279 pVTXinst->m_pLinkedGenericClause->m_Word0.f.addr = (vtxOffset + uiSize) >> 1;
280 }
281
282 uiSize += GetInstructionSize(pVTXinst->m_ShaderInstType);
283
284 pInst = pInst->pNextInst;
285 };
286
287 pShader->bLinksDirty = GL_FALSE;
288 }
289
290 void Assemble(R700_Shader *pShader)
291 {
292 GLuint i;
293 GLuint *pShaderBinary;
294 GLuint size_of_program;
295 GLuint *pCurrPos;
296
297 GLuint end_of_cf_instructions;
298 GLuint number_of_alu_dwords;
299
300 R700ShaderInstruction *pInst;
301
302 if(GL_TRUE == pShader->bBinaryShader)
303 {
304 return;
305 }
306
307 if(pShader->bLinksDirty == GL_TRUE)
308 {
309 ResolveLinks(pShader);
310 }
311
312 size_of_program = pShader->uShaderBinaryDWORDSize;
313
314 pShaderBinary = (GLuint*) MALLOC(sizeof(GLuint)*size_of_program);
315
316 pCurrPos = pShaderBinary;
317
318 for (i = 0; i < size_of_program; i++)
319 {
320 pShaderBinary[i] = 0;
321 }
322
323 pInst = pShader->lstCFInstructions.pHead;
324 while(NULL != pInst)
325 {
326 switch (pInst->m_ShaderInstType)
327 {
328 case SIT_CF_GENERIC:
329 {
330 R700ControlFlowGenericClause* pCFgeneric = (R700ControlFlowGenericClause*)pInst;
331 *pCurrPos++ = pCFgeneric->m_Word0.val;
332 *pCurrPos++ = pCFgeneric->m_Word1.val;
333 }
334 break;
335 case SIT_CF_ALU:
336 {
337 R700ControlFlowALUClause* pCFalu = (R700ControlFlowALUClause*)pInst;
338 *pCurrPos++ = pCFalu->m_Word0.val;
339 *pCurrPos++ = pCFalu->m_Word1.val;
340 }
341 break;
342 case SIT_CF_ALL_EXP_SX:
343 {
344 R700ControlFlowSXClause* pCFsx = (R700ControlFlowSXClause*)pInst;
345 *pCurrPos++ = pCFsx->m_Word0.val;
346 *pCurrPos++ = (pCFsx->m_Word1.val | pCFsx->m_Word1_SWIZ.val);
347 }
348 break;
349 case SIT_CF_ALL_EXP_SMX:
350 {
351 R700ControlFlowSMXClause* pCFsmx = (R700ControlFlowSMXClause*)pInst;
352 *pCurrPos++ = pCFsmx->m_Word0.val;
353 *pCurrPos++ = (pCFsmx->m_Word1.val | pCFsmx->m_Word1_BUF.val);
354 }
355 break;
356 default:
357 break;
358 }
359
360 pInst = pInst->pNextInst;
361 };
362
363 number_of_alu_dwords = 0;
364 pInst = pShader->lstALUInstructions.pHead;
365 while(NULL != pInst)
366 {
367 switch (pInst->m_ShaderInstType)
368 {
369 case SIT_ALU:
370 {
371 R700ALUInstruction* pALU = (R700ALUInstruction*)pInst;
372
373 *pCurrPos++ = pALU->m_Word0.val;
374 *pCurrPos++ = (pALU->m_Word1.val | pALU->m_Word1_OP2.val | pALU->m_Word1_OP3.val);
375
376 number_of_alu_dwords += 2;
377 }
378 break;
379 case SIT_ALU_HALF_LIT:
380 {
381 R700ALUInstructionHalfLiteral* pALUhalf = (R700ALUInstructionHalfLiteral*)pInst;
382
383 *pCurrPos++ = pALUhalf->m_Word0.val;
384 *pCurrPos++ = (pALUhalf->m_Word1.val | pALUhalf->m_Word1_OP2.val | pALUhalf->m_Word1_OP3.val);
385 *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralX));
386 *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralY));
387
388 number_of_alu_dwords += 4;
389 }
390 break;
391 case SIT_ALU_FALL_LIT:
392 {
393 R700ALUInstructionFullLiteral* pALUfull = (R700ALUInstructionFullLiteral*)pInst;
394
395 *pCurrPos++ = pALUfull->m_Word0.val;
396 *pCurrPos++ = (pALUfull->m_Word1.val | pALUfull->m_Word1_OP2.val | pALUfull->m_Word1_OP3.val);
397
398 *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralX));
399 *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralY));
400 *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralZ));
401 *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralW));
402
403 number_of_alu_dwords += 6;
404 }
405 break;
406 default:
407 break;
408 }
409
410 pInst = pInst->pNextInst;
411 };
412
413 pInst = pShader->lstTEXInstructions.pHead;
414 while(NULL != pInst)
415 {
416 R700TextureInstruction* pTEX = (R700TextureInstruction*)pInst;
417
418 *pCurrPos++ = pTEX->m_Word0.val;
419 *pCurrPos++ = pTEX->m_Word1.val;
420 *pCurrPos++ = pTEX->m_Word2.val;
421 *pCurrPos++ = 0x0beadeaf;
422
423 pInst = pInst->pNextInst;
424 };
425
426 pInst = pShader->lstVTXInstructions.pHead;
427 while(NULL != pInst)
428 {
429 switch (pInst->m_ShaderInstType)
430 {
431 case SIT_VTX_SEM: //
432 {
433 R700VertexSemanticFetch* pVTXsem = (R700VertexSemanticFetch*)pInst;
434
435 *pCurrPos++ = pVTXsem->m_Word0.val;
436 *pCurrPos++ = (pVTXsem->m_Word1.val | pVTXsem->m_Word1_SEM.val);
437 *pCurrPos++ = pVTXsem->m_Word2.val;
438 *pCurrPos++ = 0x0beadeaf;
439 }
440 break;
441 case SIT_VTX_GENERIC: //
442 {
443 R700VertexGenericFetch* pVTXgeneric = (R700VertexGenericFetch*)pInst;
444
445 *pCurrPos++ = pVTXgeneric->m_Word0.val;
446 *pCurrPos++ = (pVTXgeneric->m_Word1.val | pVTXgeneric->m_Word1_GPR.val);
447 *pCurrPos++ = pVTXgeneric->m_Word2.val;
448 *pCurrPos++ = 0x0beadeaf;
449 }
450 break;
451 default:
452 break;
453 }
454
455 pInst = pInst->pNextInst;
456 };
457
458 if(NULL != pShader->pProgram)
459 {
460 FREE(pShader->pProgram);
461 }
462 pShader->pProgram = (GLubyte*)pShaderBinary;
463
464 end_of_cf_instructions = pShader->uCFOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
465
466 pShader->uEndOfCF = end_of_cf_instructions >> 1;
467
468 pShader->uEndOfALU = (end_of_cf_instructions + number_of_alu_dwords) >> 1;
469
470 pShader->uEndOfFetch = (pShader->uCFOffset + pShader->uShaderBinaryDWORDSize) >> 1;
471
472 pShader->bNeedsAssembly = GL_FALSE;
473 }
474
475 void LoadProgram(R700_Shader *pShader) //context
476 {
477 }
478
479 void UpdateShaderRegisters(R700_Shader *pShader) //context
480 {
481 }
482
483 void DeleteInstructions(R700_Shader *pShader)
484 {
485 }
486
487 void DebugPrint(void)
488 {
489 }
490
491 void Clean_Up_Shader(R700_Shader *pShader)
492 {
493 FREE(pShader->pProgram);
494
495 R700ShaderInstruction *pInst;
496 R700ShaderInstruction *pInstToFree;
497
498 pInst = pShader->lstCFInstructions.pHead;
499 while(NULL != pInst)
500 {
501 pInstToFree = pInst;
502 pInst = pInst->pNextInst;
503 FREE(pInstToFree);
504 };
505 pInst = pShader->lstALUInstructions.pHead;
506 while(NULL != pInst)
507 {
508 pInstToFree = pInst;
509 pInst = pInst->pNextInst;
510 FREE(pInstToFree);
511 };
512 pInst = pShader->lstTEXInstructions.pHead;
513 while(NULL != pInst)
514 {
515 pInstToFree = pInst;
516 pInst = pInst->pNextInst;
517 FREE(pInstToFree);
518 };
519 pInst = pShader->lstVTXInstructions.pHead;
520 while(NULL != pInst)
521 {
522 pInstToFree = pInst;
523 pInst = pInst->pNextInst;
524 FREE(pInstToFree);
525 };
526 }
527