Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_statevars.h"
37
38 #include "r600_context.h"
39 #include "r600_cmdbuf.h"
40
41 #include "r700_fragprog.h"
42
43 #include "r700_debug.h"
44
45 //TODO : Validate FP input with VP output.
46 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
47 struct gl_fragment_program *mesa_fp)
48 {
49 unsigned int unBit;
50 unsigned int i;
51 GLuint ui;
52
53 pAsm->number_used_registers = 0;
54
55 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
56 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
57 //MUST match order in Map_Vertex_Output
58 unBit = 1 << FRAG_ATTRIB_COL0;
59 if(mesa_fp->Base.InputsRead & unBit)
60 {
61 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
62 }
63
64 unBit = 1 << FRAG_ATTRIB_COL1;
65 if(mesa_fp->Base.InputsRead & unBit)
66 {
67 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
68 }
69
70 for(i=0; i<8; i++)
71 {
72 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
73 if(mesa_fp->Base.InputsRead & unBit)
74 {
75 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
76 }
77 }
78
79 /* Map temporary registers (GPRs) */
80 pAsm->starting_temp_register_number = pAsm->number_used_registers;
81
82 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
83 {
84 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
85 }
86 else
87 {
88 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
89 }
90
91 /* Output mapping */
92 pAsm->number_of_exports = 0;
93 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
94 pAsm->starting_export_register_number = pAsm->number_used_registers;
95 unBit = 1 << FRAG_RESULT_COLOR;
96 if(mesa_fp->Base.OutputsWritten & unBit)
97 {
98 pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
99 pAsm->number_of_exports++;
100 pAsm->number_of_colorandz_exports++;
101 }
102 unBit = 1 << FRAG_RESULT_DEPTH;
103 if(mesa_fp->Base.OutputsWritten & unBit)
104 {
105 pAsm->depth_export_register_number = pAsm->number_used_registers;
106 pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
107 pAsm->number_of_exports++;
108 pAsm->number_of_colorandz_exports++;
109 }
110
111 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
112 for(ui=0; ui<pAsm->number_of_exports; ui++)
113 {
114 pAsm->pucOutMask[ui] = 0x0;
115 }
116
117 pAsm->uFirstHelpReg = pAsm->number_used_registers;
118 }
119
120 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
121 struct gl_fragment_program *mesa_fp)
122 {
123 GLuint i, j;
124 GLint * puiTEMPwrites;
125 struct prog_instruction * pILInst;
126 InstDeps *pInstDeps;
127 struct prog_instruction * texcoord_DepInst;
128 GLint nDepInstID;
129
130 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
131 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
132 {
133 puiTEMPwrites[i] = -1;
134 }
135
136 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
137
138 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
139 {
140 pInstDeps[i].nDstDep = -1;
141 pILInst = &(mesa_fp->Base.Instructions[i]);
142
143 //Dst
144 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
145 {
146 //Set lastwrite for the temp
147 puiTEMPwrites[pILInst->DstReg.Index] = i;
148 }
149
150 //Src
151 for(j=0; j<3; j++)
152 {
153 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
154 {
155 //Set dep.
156 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
157 }
158 else
159 {
160 pInstDeps[i].nSrcDeps[j] = -1;
161 }
162 }
163 }
164
165 fp->r700AsmCode.pInstDeps = pInstDeps;
166
167 FREE(puiTEMPwrites);
168
169 //Find dep for tex inst
170 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
171 {
172 pILInst = &(mesa_fp->Base.Instructions[i]);
173
174 if(GL_TRUE == IsTex(pILInst->Opcode))
175 { //src0 is the tex coord register, src1 is texunit, src2 is textype
176 nDepInstID = pInstDeps[i].nSrcDeps[0];
177 if(nDepInstID >= 0)
178 {
179 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
180 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
181 {
182 pInstDeps[nDepInstID].nDstDep = i;
183 pInstDeps[i].nDstDep = i;
184 }
185 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
186 {
187 pInstDeps[i].nDstDep = i;
188 }
189 else
190 { //... other deps?
191 }
192 }
193 }
194 }
195
196 return GL_TRUE;
197 }
198
199 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
200 struct gl_fragment_program *mesa_fp)
201 {
202 GLuint number_of_colors_exported;
203 GLboolean z_enabled = GL_FALSE;
204 GLuint unBit;
205
206 //Init_Program
207 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
208 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
209
210 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
211 {
212 return GL_FALSE;
213 }
214
215 if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
216 &(mesa_fp->Base.Instructions[0]),
217 &(fp->r700AsmCode)) )
218 {
219 return GL_FALSE;
220 }
221
222 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
223 {
224 return GL_FALSE;
225 }
226
227 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
228 : (fp->r700AsmCode.number_used_registers - 1);
229
230 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
231
232 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
233
234 unBit = 1 << FRAG_RESULT_DEPTH;
235 if(mesa_fp->Base.OutputsWritten & unBit)
236 {
237 z_enabled = GL_TRUE;
238 number_of_colors_exported--;
239 }
240
241 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
242
243 fp->translated = GL_TRUE;
244
245 return GL_TRUE;
246 }
247
248 void * r700GetActiveFpShaderBo(GLcontext * ctx)
249 {
250 struct r700_fragment_program *fp = (struct r700_fragment_program *)
251 (ctx->FragmentProgram._Current);
252
253 return fp->shaderbo;
254 }
255
256 GLboolean r700SetupFragmentProgram(GLcontext * ctx)
257 {
258 context_t *context = R700_CONTEXT(ctx);
259 BATCH_LOCALS(&context->radeon);
260
261 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
262
263 struct r700_fragment_program *fp = (struct r700_fragment_program *)
264 (ctx->FragmentProgram._Current);
265
266 struct gl_program_parameter_list *paramList;
267 unsigned int unNumParamData;
268 unsigned int ui;
269
270 unsigned int unNumOfReg;
271
272 if(GL_FALSE == fp->loaded)
273 {
274 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
275 {
276 Assemble( &(fp->r700Shader) );
277 }
278
279 /* Load fp to gpu */
280 r600EmitShader(ctx,
281 &(fp->shaderbo),
282 (GLvoid *)(fp->r700Shader.pProgram),
283 fp->r700Shader.uShaderBinaryDWORDSize,
284 "FS");
285
286 fp->loaded = GL_TRUE;
287 }
288
289 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
290 fp->r700Shader.uShaderBinaryDWORDSize);
291
292 /* TODO : enable this after MemUse fixed *=
293 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
294 */
295
296 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
297
298 unNumOfReg = fp->r700Shader.nRegs + 1;
299
300 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
301
302 ui = ui ? ui : unNumOfReg;
303
304 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
305
306 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
307
308 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
309 {
310 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
311 STACK_SIZE_shift, STACK_SIZE_mask);
312 }
313
314 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
315 EXPORT_MODE_shift, EXPORT_MODE_mask);
316
317 if(fp->r700Shader.killIsUsed)
318 {
319 SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
320 }
321 else
322 {
323 CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
324 }
325
326 if(fp->r700Shader.depthIsExported)
327 {
328 SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
329 }
330 else
331 {
332 CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
333 }
334
335 /* sent out shader constants. */
336
337 paramList = fp->mesa_program.Base.Parameters;
338
339 if(NULL != paramList)
340 {
341 _mesa_load_state_parameters(ctx, paramList);
342
343 unNumParamData = paramList->NumParameters * 4;
344
345 BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData);
346
347 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData));
348
349 /* assembler map const from very beginning. */
350 R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
351
352 unNumParamData = paramList->NumParameters;
353
354 for(ui=0; ui<unNumParamData; ui++)
355 {
356 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0])));
357 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1])));
358 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2])));
359 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3])));
360 }
361 END_BATCH();
362 COMMIT_BATCH();
363 }
364
365 return GL_TRUE;
366 }
367
368
369