Merge commit 'nha/r300-compiler-gallium'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_statevars.h"
37
38 #include "r600_context.h"
39 #include "r600_cmdbuf.h"
40
41 #include "r700_fragprog.h"
42
43 #include "r700_debug.h"
44
45 //TODO : Validate FP input with VP output.
46 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
47 struct gl_fragment_program *mesa_fp)
48 {
49 unsigned int unBit;
50 unsigned int i;
51 GLuint ui;
52
53 pAsm->number_used_registers = 0;
54
55 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
56 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
57 //MUST match order in Map_Vertex_Output
58 unBit = 1 << FRAG_ATTRIB_COL0;
59 if(mesa_fp->Base.InputsRead & unBit)
60 {
61 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
62 }
63
64 unBit = 1 << FRAG_ATTRIB_COL1;
65 if(mesa_fp->Base.InputsRead & unBit)
66 {
67 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
68 }
69
70 unBit = 1 << FRAG_ATTRIB_FOGC;
71 if(mesa_fp->Base.InputsRead & unBit)
72 {
73 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
74 }
75
76 for(i=0; i<8; i++)
77 {
78 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
79 if(mesa_fp->Base.InputsRead & unBit)
80 {
81 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
82 }
83 }
84
85 /* Map temporary registers (GPRs) */
86 pAsm->starting_temp_register_number = pAsm->number_used_registers;
87
88 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
89 {
90 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
91 }
92 else
93 {
94 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
95 }
96
97 /* Output mapping */
98 pAsm->number_of_exports = 0;
99 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
100 pAsm->starting_export_register_number = pAsm->number_used_registers;
101 unBit = 1 << FRAG_RESULT_COLOR;
102 if(mesa_fp->Base.OutputsWritten & unBit)
103 {
104 pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
105 pAsm->number_of_exports++;
106 pAsm->number_of_colorandz_exports++;
107 }
108 unBit = 1 << FRAG_RESULT_DEPTH;
109 if(mesa_fp->Base.OutputsWritten & unBit)
110 {
111 pAsm->depth_export_register_number = pAsm->number_used_registers;
112 pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
113 pAsm->number_of_exports++;
114 pAsm->number_of_colorandz_exports++;
115 }
116
117 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
118 for(ui=0; ui<pAsm->number_of_exports; ui++)
119 {
120 pAsm->pucOutMask[ui] = 0x0;
121 }
122
123 pAsm->uFirstHelpReg = pAsm->number_used_registers;
124 }
125
126 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
127 struct gl_fragment_program *mesa_fp)
128 {
129 GLuint i, j;
130 GLint * puiTEMPwrites;
131 struct prog_instruction * pILInst;
132 InstDeps *pInstDeps;
133 struct prog_instruction * texcoord_DepInst;
134 GLint nDepInstID;
135
136 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
137 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
138 {
139 puiTEMPwrites[i] = -1;
140 }
141
142 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
143
144 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
145 {
146 pInstDeps[i].nDstDep = -1;
147 pILInst = &(mesa_fp->Base.Instructions[i]);
148
149 //Dst
150 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
151 {
152 //Set lastwrite for the temp
153 puiTEMPwrites[pILInst->DstReg.Index] = i;
154 }
155
156 //Src
157 for(j=0; j<3; j++)
158 {
159 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
160 {
161 //Set dep.
162 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
163 }
164 else
165 {
166 pInstDeps[i].nSrcDeps[j] = -1;
167 }
168 }
169 }
170
171 fp->r700AsmCode.pInstDeps = pInstDeps;
172
173 FREE(puiTEMPwrites);
174
175 //Find dep for tex inst
176 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
177 {
178 pILInst = &(mesa_fp->Base.Instructions[i]);
179
180 if(GL_TRUE == IsTex(pILInst->Opcode))
181 { //src0 is the tex coord register, src1 is texunit, src2 is textype
182 nDepInstID = pInstDeps[i].nSrcDeps[0];
183 if(nDepInstID >= 0)
184 {
185 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
186 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
187 {
188 pInstDeps[nDepInstID].nDstDep = i;
189 pInstDeps[i].nDstDep = i;
190 }
191 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
192 {
193 pInstDeps[i].nDstDep = i;
194 }
195 else
196 { //... other deps?
197 }
198 }
199 }
200 }
201
202 return GL_TRUE;
203 }
204
205 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
206 struct gl_fragment_program *mesa_fp)
207 {
208 GLuint number_of_colors_exported;
209 GLboolean z_enabled = GL_FALSE;
210 GLuint unBit;
211
212 //Init_Program
213 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
214 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
215
216 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
217 {
218 return GL_FALSE;
219 }
220
221 if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
222 &(mesa_fp->Base.Instructions[0]),
223 &(fp->r700AsmCode)) )
224 {
225 return GL_FALSE;
226 }
227
228 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
229 {
230 return GL_FALSE;
231 }
232
233 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
234 : (fp->r700AsmCode.number_used_registers - 1);
235
236 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
237
238 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
239
240 unBit = 1 << FRAG_RESULT_DEPTH;
241 if(mesa_fp->Base.OutputsWritten & unBit)
242 {
243 z_enabled = GL_TRUE;
244 number_of_colors_exported--;
245 }
246
247 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
248
249 fp->translated = GL_TRUE;
250
251 return GL_TRUE;
252 }
253
254 void * r700GetActiveFpShaderBo(GLcontext * ctx)
255 {
256 struct r700_fragment_program *fp = (struct r700_fragment_program *)
257 (ctx->FragmentProgram._Current);
258
259 return fp->shaderbo;
260 }
261
262 GLboolean r700SetupFragmentProgram(GLcontext * ctx)
263 {
264 context_t *context = R700_CONTEXT(ctx);
265 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
266 BATCH_LOCALS(&context->radeon);
267 struct r700_fragment_program *fp = (struct r700_fragment_program *)
268 (ctx->FragmentProgram._Current);
269 r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
270 struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
271 struct gl_program_parameter_list *paramList;
272 unsigned int unNumParamData;
273 unsigned int ui, i;
274 unsigned int unNumOfReg;
275 unsigned int unBit;
276
277 if(GL_FALSE == fp->loaded)
278 {
279 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
280 {
281 Assemble( &(fp->r700Shader) );
282 }
283
284 /* Load fp to gpu */
285 r600EmitShader(ctx,
286 &(fp->shaderbo),
287 (GLvoid *)(fp->r700Shader.pProgram),
288 fp->r700Shader.uShaderBinaryDWORDSize,
289 "FS");
290
291 fp->loaded = GL_TRUE;
292 }
293
294 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
295 fp->r700Shader.uShaderBinaryDWORDSize);
296
297 /* TODO : enable this after MemUse fixed *=
298 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
299 */
300
301 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
302
303 unNumOfReg = fp->r700Shader.nRegs + 1;
304
305 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
306
307 ui = (unNumOfReg < ui) ? ui : unNumOfReg;
308
309 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
310
311 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
312
313 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
314 {
315 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
316 STACK_SIZE_shift, STACK_SIZE_mask);
317 }
318
319 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
320 EXPORT_MODE_shift, EXPORT_MODE_mask);
321
322 if(fp->r700Shader.killIsUsed)
323 {
324 SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
325 }
326 else
327 {
328 CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
329 }
330
331 if(fp->r700Shader.depthIsExported)
332 {
333 SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
334 }
335 else
336 {
337 CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
338 }
339
340 /* sent out shader constants. */
341 paramList = fp->mesa_program.Base.Parameters;
342
343 if(NULL != paramList)
344 {
345 _mesa_load_state_parameters(ctx, paramList);
346
347 unNumParamData = paramList->NumParameters * 4;
348
349 BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData);
350
351 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData));
352
353 /* assembler map const from very beginning. */
354 R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
355
356 unNumParamData = paramList->NumParameters;
357
358 for(ui=0; ui<unNumParamData; ui++)
359 {
360 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0])));
361 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1])));
362 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2])));
363 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3])));
364 }
365 END_BATCH();
366 COMMIT_BATCH();
367 }
368
369 // emit ps input map
370 unBit = 1 << FRAG_ATTRIB_COL0;
371 if(mesa_fp->Base.InputsRead & unBit)
372 {
373 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
374 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
375 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
376 SEMANTIC_shift, SEMANTIC_mask);
377 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
378 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
379 else
380 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
381 }
382
383 unBit = 1 << FRAG_ATTRIB_COL1;
384 if(mesa_fp->Base.InputsRead & unBit)
385 {
386 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
387 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
388 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
389 SEMANTIC_shift, SEMANTIC_mask);
390 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
391 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
392 else
393 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
394 }
395
396 unBit = 1 << FRAG_ATTRIB_FOGC;
397 if(mesa_fp->Base.InputsRead & unBit)
398 {
399 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
400 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
401 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
402 SEMANTIC_shift, SEMANTIC_mask);
403 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
404 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
405 else
406 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
407 }
408
409 for(i=0; i<8; i++)
410 {
411 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
412 if(mesa_fp->Base.InputsRead & unBit)
413 {
414 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
415 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
416 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
417 SEMANTIC_shift, SEMANTIC_mask);
418 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
419 }
420 }
421
422 return GL_TRUE;
423 }
424