2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_statevars.h"
38 #include "r600_context.h"
39 #include "r600_cmdbuf.h"
41 #include "r700_fragprog.h"
43 #include "r700_debug.h"
45 //TODO : Validate FP input with VP output.
46 void Map_Fragment_Program(r700_AssemblerBase
*pAsm
,
47 struct gl_fragment_program
*mesa_fp
)
53 pAsm
->number_used_registers
= 0;
55 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
56 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
57 //MUST match order in Map_Vertex_Output
58 unBit
= 1 << FRAG_ATTRIB_WPOS
;
59 if(mesa_fp
->Base
.InputsRead
& unBit
)
61 pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_WPOS
] = pAsm
->number_used_registers
++;
64 unBit
= 1 << FRAG_ATTRIB_COL0
;
65 if(mesa_fp
->Base
.InputsRead
& unBit
)
67 pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_COL0
] = pAsm
->number_used_registers
++;
70 unBit
= 1 << FRAG_ATTRIB_COL1
;
71 if(mesa_fp
->Base
.InputsRead
& unBit
)
73 pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_COL1
] = pAsm
->number_used_registers
++;
76 unBit
= 1 << FRAG_ATTRIB_FOGC
;
77 if(mesa_fp
->Base
.InputsRead
& unBit
)
79 pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_FOGC
] = pAsm
->number_used_registers
++;
84 unBit
= 1 << (FRAG_ATTRIB_TEX0
+ i
);
85 if(mesa_fp
->Base
.InputsRead
& unBit
)
87 pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_TEX0
+ i
] = pAsm
->number_used_registers
++;
91 /* order has been taken care of */
93 for(i
=FRAG_ATTRIB_VAR0
; i
<FRAG_ATTRIB_MAX
; i
++)
96 if(mesa_fp
->Base
.InputsRead
& unBit
)
98 pAsm
->uiFP_AttributeMap
[i
] = pAsm
->number_used_registers
++;
102 if( (mesa_fp
->Base
.InputsRead
>> FRAG_ATTRIB_VAR0
) > 0 )
104 struct r700_vertex_program_cont
*vpc
=
105 (struct r700_vertex_program_cont
*)ctx
->VertexProgram
._Current
;
106 struct gl_program_parameter_list
* VsVarying
= vpc
->mesa_program
.Base
.Varying
;
107 struct gl_program_parameter_list
* PsVarying
= mesa_fp
->Base
.Varying
;
108 struct gl_program_parameter
* pVsParam
;
109 struct gl_program_parameter
* pPsParam
;
111 GLuint unMaxVarying
= 0;
113 for(i
=0; i
<VsVarying
->NumParameters
; i
++)
115 pAsm
->uiFP_AttributeMap
[i
+ FRAG_ATTRIB_VAR0
] = 0;
118 for(i
=FRAG_ATTRIB_VAR0
; i
<FRAG_ATTRIB_MAX
; i
++)
121 if(mesa_fp
->Base
.InputsRead
& unBit
)
123 j
= i
- FRAG_ATTRIB_VAR0
;
124 pPsParam
= PsVarying
->Parameters
+ j
;
126 for(k
=0; k
<VsVarying
->NumParameters
; k
++)
128 pVsParam
= VsVarying
->Parameters
+ k
;
130 if( strcmp(pPsParam
->Name
, pVsParam
->Name
) == 0)
132 pAsm
->uiFP_AttributeMap
[i
] = pAsm
->number_used_registers
+ k
;
143 pAsm
->number_used_registers
+= unMaxVarying
+ 1;
147 /* Map temporary registers (GPRs) */
148 pAsm
->starting_temp_register_number
= pAsm
->number_used_registers
;
150 if(mesa_fp
->Base
.NumNativeTemporaries
>= mesa_fp
->Base
.NumTemporaries
)
152 pAsm
->number_used_registers
+= mesa_fp
->Base
.NumNativeTemporaries
;
156 pAsm
->number_used_registers
+= mesa_fp
->Base
.NumTemporaries
;
160 pAsm
->number_of_exports
= 0;
161 pAsm
->number_of_colorandz_exports
= 0; /* don't include stencil and mask out. */
162 pAsm
->starting_export_register_number
= pAsm
->number_used_registers
;
163 unBit
= 1 << FRAG_RESULT_COLOR
;
164 if(mesa_fp
->Base
.OutputsWritten
& unBit
)
166 pAsm
->uiFP_OutputMap
[FRAG_RESULT_COLOR
] = pAsm
->number_used_registers
++;
167 pAsm
->number_of_exports
++;
168 pAsm
->number_of_colorandz_exports
++;
170 unBit
= 1 << FRAG_RESULT_DEPTH
;
171 if(mesa_fp
->Base
.OutputsWritten
& unBit
)
173 pAsm
->depth_export_register_number
= pAsm
->number_used_registers
;
174 pAsm
->uiFP_OutputMap
[FRAG_RESULT_DEPTH
] = pAsm
->number_used_registers
++;
175 pAsm
->number_of_exports
++;
176 pAsm
->number_of_colorandz_exports
++;
177 pAsm
->pR700Shader
->depthIsExported
= 1;
180 pAsm
->pucOutMask
= (unsigned char*) MALLOC(pAsm
->number_of_exports
);
181 for(ui
=0; ui
<pAsm
->number_of_exports
; ui
++)
183 pAsm
->pucOutMask
[ui
] = 0x0;
186 pAsm
->flag_reg_index
= pAsm
->number_used_registers
++;
188 pAsm
->uFirstHelpReg
= pAsm
->number_used_registers
;
191 GLboolean
Find_Instruction_Dependencies_fp(struct r700_fragment_program
*fp
,
192 struct gl_fragment_program
*mesa_fp
)
195 GLint
* puiTEMPwrites
;
196 GLint
* puiTEMPreads
;
197 struct prog_instruction
* pILInst
;
199 struct prog_instruction
* texcoord_DepInst
;
202 puiTEMPwrites
= (GLint
*) MALLOC(sizeof(GLuint
)*mesa_fp
->Base
.NumTemporaries
);
203 puiTEMPreads
= (GLint
*) MALLOC(sizeof(GLuint
)*mesa_fp
->Base
.NumTemporaries
);
205 for(i
=0; i
<mesa_fp
->Base
.NumTemporaries
; i
++)
207 puiTEMPwrites
[i
] = -1;
208 puiTEMPreads
[i
] = -1;
211 pInstDeps
= (InstDeps
*)MALLOC(sizeof(InstDeps
)*mesa_fp
->Base
.NumInstructions
);
213 for(i
=0; i
<mesa_fp
->Base
.NumInstructions
; i
++)
215 pInstDeps
[i
].nDstDep
= -1;
216 pILInst
= &(mesa_fp
->Base
.Instructions
[i
]);
219 if(pILInst
->DstReg
.File
== PROGRAM_TEMPORARY
)
221 //Set lastwrite for the temp
222 puiTEMPwrites
[pILInst
->DstReg
.Index
] = i
;
228 if(pILInst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
)
231 pInstDeps
[i
].nSrcDeps
[j
] = puiTEMPwrites
[pILInst
->SrcReg
[j
].Index
];
233 if(puiTEMPreads
[pILInst
->SrcReg
[j
].Index
] < 0 )
235 puiTEMPreads
[pILInst
->SrcReg
[j
].Index
] = i
;
240 pInstDeps
[i
].nSrcDeps
[j
] = -1;
245 fp
->r700AsmCode
.pInstDeps
= pInstDeps
;
247 //Find dep for tex inst
248 for(i
=0; i
<mesa_fp
->Base
.NumInstructions
; i
++)
250 pILInst
= &(mesa_fp
->Base
.Instructions
[i
]);
252 if(GL_TRUE
== IsTex(pILInst
->Opcode
))
253 { //src0 is the tex coord register, src1 is texunit, src2 is textype
254 nDepInstID
= pInstDeps
[i
].nSrcDeps
[0];
257 texcoord_DepInst
= &(mesa_fp
->Base
.Instructions
[nDepInstID
]);
258 if(GL_TRUE
== IsAlu(texcoord_DepInst
->Opcode
) )
260 pInstDeps
[nDepInstID
].nDstDep
= i
;
261 pInstDeps
[i
].nDstDep
= i
;
263 else if(GL_TRUE
== IsTex(texcoord_DepInst
->Opcode
) )
265 pInstDeps
[i
].nDstDep
= i
;
271 // make sure that we dont overwrite src used earlier
272 nDepInstID
= puiTEMPreads
[pILInst
->DstReg
.Index
];
275 pInstDeps
[i
].nDstDep
= puiTEMPreads
[pILInst
->DstReg
.Index
];
276 texcoord_DepInst
= &(mesa_fp
->Base
.Instructions
[nDepInstID
]);
277 if(GL_TRUE
== IsAlu(texcoord_DepInst
->Opcode
) )
279 pInstDeps
[nDepInstID
].nDstDep
= i
;
293 GLboolean
r700TranslateFragmentShader(struct r700_fragment_program
*fp
,
294 struct gl_fragment_program
*mesa_fp
)
296 GLuint number_of_colors_exported
;
297 GLboolean z_enabled
= GL_FALSE
;
301 Init_r700_AssemblerBase( SPT_FP
, &(fp
->r700AsmCode
), &(fp
->r700Shader
) );
302 Map_Fragment_Program(&(fp
->r700AsmCode
), mesa_fp
);
304 if( GL_FALSE
== Find_Instruction_Dependencies_fp(fp
, mesa_fp
) )
309 InitShaderProgram(&(fp
->r700AsmCode
));
311 if( GL_FALSE
== AssembleInstr(0,
312 mesa_fp
->Base
.NumInstructions
,
313 &(mesa_fp
->Base
.Instructions
[0]),
314 &(fp
->r700AsmCode
)) )
319 if(GL_FALSE
== Process_Fragment_Exports(&(fp
->r700AsmCode
), mesa_fp
->Base
.OutputsWritten
) )
324 if( GL_FALSE
== RelocProgram(&(fp
->r700AsmCode
)) )
329 fp
->r700Shader
.nRegs
= (fp
->r700AsmCode
.number_used_registers
== 0) ? 0
330 : (fp
->r700AsmCode
.number_used_registers
- 1);
332 fp
->r700Shader
.nParamExports
= fp
->r700AsmCode
.number_of_exports
;
334 number_of_colors_exported
= fp
->r700AsmCode
.number_of_colorandz_exports
;
336 unBit
= 1 << FRAG_RESULT_DEPTH
;
337 if(mesa_fp
->Base
.OutputsWritten
& unBit
)
340 number_of_colors_exported
--;
343 /* illegal to set this to 0 */
344 if(number_of_colors_exported
|| z_enabled
)
346 fp
->r700Shader
.exportMode
= number_of_colors_exported
<< 1 | z_enabled
;
350 fp
->r700Shader
.exportMode
= (1 << 1);
353 fp
->translated
= GL_TRUE
;
358 void r700SelectFragmentShader(GLcontext
*ctx
)
360 context_t
*context
= R700_CONTEXT(ctx
);
361 struct r700_fragment_program
*fp
= (struct r700_fragment_program
*)
362 (ctx
->FragmentProgram
._Current
);
363 if (context
->radeon
.radeonScreen
->chip_family
< CHIP_FAMILY_RV770
)
365 fp
->r700AsmCode
.bR6xx
= 1;
368 if (GL_FALSE
== fp
->translated
)
369 r700TranslateFragmentShader(fp
, &(fp
->mesa_program
));
372 void * r700GetActiveFpShaderBo(GLcontext
* ctx
)
374 struct r700_fragment_program
*fp
= (struct r700_fragment_program
*)
375 (ctx
->FragmentProgram
._Current
);
380 GLboolean
r700SetupFragmentProgram(GLcontext
* ctx
)
382 context_t
*context
= R700_CONTEXT(ctx
);
383 R700_CHIP_CONTEXT
*r700
= (R700_CHIP_CONTEXT
*)(&context
->hw
);
384 struct r700_fragment_program
*fp
= (struct r700_fragment_program
*)
385 (ctx
->FragmentProgram
._Current
);
386 r700_AssemblerBase
*pAsm
= &(fp
->r700AsmCode
);
387 struct gl_fragment_program
*mesa_fp
= &(fp
->mesa_program
);
388 struct gl_program_parameter_list
*paramList
;
389 unsigned int unNumParamData
;
391 unsigned int unNumOfReg
;
395 if(GL_FALSE
== fp
->loaded
)
397 if(fp
->r700Shader
.bNeedsAssembly
== GL_TRUE
)
399 Assemble( &(fp
->r700Shader
) );
405 (GLvoid
*)(fp
->r700Shader
.pProgram
),
406 fp
->r700Shader
.uShaderBinaryDWORDSize
,
409 fp
->loaded
= GL_TRUE
;
412 DumpHwBinary(DUMP_PIXEL_SHADER
, (GLvoid
*)(fp
->r700Shader
.pProgram
),
413 fp
->r700Shader
.uShaderBinaryDWORDSize
);
415 /* TODO : enable this after MemUse fixed *=
416 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
419 R600_STATECHANGE(context
, ps
);
421 r700
->ps
.SQ_PGM_RESOURCES_PS
.u32All
= 0;
422 SETbit(r700
->ps
.SQ_PGM_RESOURCES_PS
.u32All
, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit
);
424 r700
->ps
.SQ_PGM_START_PS
.u32All
= 0; /* set from buffer obj */
426 R600_STATECHANGE(context
, spi
);
428 unNumOfReg
= fp
->r700Shader
.nRegs
+ 1;
430 ui
= (r700
->SPI_PS_IN_CONTROL_0
.u32All
& NUM_INTERP_mask
) / (1 << NUM_INTERP_shift
);
432 /* PS uses fragment.position */
433 if (mesa_fp
->Base
.InputsRead
& (1 << FRAG_ATTRIB_WPOS
))
436 SETfield(r700
->SPI_PS_IN_CONTROL_0
.u32All
, ui
, NUM_INTERP_shift
, NUM_INTERP_mask
);
437 SETfield(r700
->SPI_PS_IN_CONTROL_0
.u32All
, CENTERS_ONLY
, BARYC_SAMPLE_CNTL_shift
, BARYC_SAMPLE_CNTL_mask
);
438 SETbit(r700
->SPI_PS_IN_CONTROL_0
.u32All
, POSITION_ENA_bit
);
439 SETbit(r700
->SPI_INPUT_Z
.u32All
, PROVIDE_Z_TO_SPI_bit
);
443 CLEARbit(r700
->SPI_PS_IN_CONTROL_0
.u32All
, POSITION_ENA_bit
);
444 CLEARbit(r700
->SPI_INPUT_Z
.u32All
, PROVIDE_Z_TO_SPI_bit
);
447 ui
= (unNumOfReg
< ui
) ? ui
: unNumOfReg
;
449 SETfield(r700
->ps
.SQ_PGM_RESOURCES_PS
.u32All
, ui
, NUM_GPRS_shift
, NUM_GPRS_mask
);
451 CLEARbit(r700
->ps
.SQ_PGM_RESOURCES_PS
.u32All
, UNCACHED_FIRST_INST_bit
);
453 if(fp
->r700Shader
.uStackSize
) /* we don't use branch for now, it should be zero. */
455 SETfield(r700
->ps
.SQ_PGM_RESOURCES_PS
.u32All
, fp
->r700Shader
.uStackSize
,
456 STACK_SIZE_shift
, STACK_SIZE_mask
);
459 SETfield(r700
->ps
.SQ_PGM_EXPORTS_PS
.u32All
, fp
->r700Shader
.exportMode
,
460 EXPORT_MODE_shift
, EXPORT_MODE_mask
);
463 unBit
= 1 << FRAG_ATTRIB_WPOS
;
464 if(mesa_fp
->Base
.InputsRead
& unBit
)
466 ui
= pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_WPOS
];
467 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
468 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
469 SEMANTIC_shift
, SEMANTIC_mask
);
470 if (r700
->SPI_INTERP_CONTROL_0
.u32All
& FLAT_SHADE_ENA_bit
)
471 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
473 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
476 unBit
= 1 << FRAG_ATTRIB_COL0
;
477 if(mesa_fp
->Base
.InputsRead
& unBit
)
479 ui
= pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_COL0
];
480 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
481 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
482 SEMANTIC_shift
, SEMANTIC_mask
);
483 if (r700
->SPI_INTERP_CONTROL_0
.u32All
& FLAT_SHADE_ENA_bit
)
484 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
486 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
489 unBit
= 1 << FRAG_ATTRIB_COL1
;
490 if(mesa_fp
->Base
.InputsRead
& unBit
)
492 ui
= pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_COL1
];
493 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
494 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
495 SEMANTIC_shift
, SEMANTIC_mask
);
496 if (r700
->SPI_INTERP_CONTROL_0
.u32All
& FLAT_SHADE_ENA_bit
)
497 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
499 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
502 unBit
= 1 << FRAG_ATTRIB_FOGC
;
503 if(mesa_fp
->Base
.InputsRead
& unBit
)
505 ui
= pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_FOGC
];
506 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
507 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
508 SEMANTIC_shift
, SEMANTIC_mask
);
509 if (r700
->SPI_INTERP_CONTROL_0
.u32All
& FLAT_SHADE_ENA_bit
)
510 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
512 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
517 unBit
= 1 << (FRAG_ATTRIB_TEX0
+ i
);
518 if(mesa_fp
->Base
.InputsRead
& unBit
)
520 ui
= pAsm
->uiFP_AttributeMap
[FRAG_ATTRIB_TEX0
+ i
];
521 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
522 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
523 SEMANTIC_shift
, SEMANTIC_mask
);
524 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
528 for(i
=FRAG_ATTRIB_VAR0
; i
<FRAG_ATTRIB_MAX
; i
++)
531 if(mesa_fp
->Base
.InputsRead
& unBit
)
533 ui
= pAsm
->uiFP_AttributeMap
[i
];
534 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, SEL_CENTROID_bit
);
535 SETfield(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, ui
,
536 SEMANTIC_shift
, SEMANTIC_mask
);
537 if (r700
->SPI_INTERP_CONTROL_0
.u32All
& FLAT_SHADE_ENA_bit
)
538 SETbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
540 CLEARbit(r700
->SPI_PS_INPUT_CNTL
[ui
].u32All
, FLAT_SHADE_bit
);
544 exportCount
= (r700
->ps
.SQ_PGM_EXPORTS_PS
.u32All
& EXPORT_MODE_mask
) / (1 << EXPORT_MODE_shift
);
545 if (r700
->CB_SHADER_CONTROL
.u32All
!= ((1 << exportCount
) - 1))
547 R600_STATECHANGE(context
, cb
);
548 r700
->CB_SHADER_CONTROL
.u32All
= (1 << exportCount
) - 1;
551 /* sent out shader constants. */
552 paramList
= fp
->mesa_program
.Base
.Parameters
;
554 if(NULL
!= paramList
) {
555 _mesa_load_state_parameters(ctx
, paramList
);
557 if (paramList
->NumParameters
> R700_MAX_DX9_CONSTS
)
560 R600_STATECHANGE(context
, ps_consts
);
562 r700
->ps
.num_consts
= paramList
->NumParameters
;
564 unNumParamData
= paramList
->NumParameters
;
566 for(ui
=0; ui
<unNumParamData
; ui
++) {
567 r700
->ps
.consts
[ui
][0].f32All
= paramList
->ParameterValues
[ui
][0];
568 r700
->ps
.consts
[ui
][1].f32All
= paramList
->ParameterValues
[ui
][1];
569 r700
->ps
.consts
[ui
][2].f32All
= paramList
->ParameterValues
[ui
][2];
570 r700
->ps
.consts
[ui
][3].f32All
= paramList
->ParameterValues
[ui
][3];
573 r700
->ps
.num_consts
= 0;