0f549ead9cc63c5283d7638975bca1361ea475a6
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_statevars.h"
37
38 #include "r600_context.h"
39 #include "r600_cmdbuf.h"
40
41 #include "r700_fragprog.h"
42
43 #include "r700_debug.h"
44
45 //TODO : Validate FP input with VP output.
46 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
47 struct gl_fragment_program *mesa_fp)
48 {
49 unsigned int unBit;
50 unsigned int i;
51 GLuint ui;
52
53 pAsm->number_used_registers = 0;
54
55 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
56 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
57 //MUST match order in Map_Vertex_Output
58 unBit = 1 << FRAG_ATTRIB_WPOS;
59 if(mesa_fp->Base.InputsRead & unBit)
60 {
61 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
62 }
63
64 unBit = 1 << FRAG_ATTRIB_COL0;
65 if(mesa_fp->Base.InputsRead & unBit)
66 {
67 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
68 }
69
70 unBit = 1 << FRAG_ATTRIB_COL1;
71 if(mesa_fp->Base.InputsRead & unBit)
72 {
73 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
74 }
75
76 unBit = 1 << FRAG_ATTRIB_FOGC;
77 if(mesa_fp->Base.InputsRead & unBit)
78 {
79 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
80 }
81
82 for(i=0; i<8; i++)
83 {
84 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
85 if(mesa_fp->Base.InputsRead & unBit)
86 {
87 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
88 }
89 }
90
91 /* Map temporary registers (GPRs) */
92 pAsm->starting_temp_register_number = pAsm->number_used_registers;
93
94 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
95 {
96 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
97 }
98 else
99 {
100 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
101 }
102
103 /* Output mapping */
104 pAsm->number_of_exports = 0;
105 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
106 pAsm->starting_export_register_number = pAsm->number_used_registers;
107 unBit = 1 << FRAG_RESULT_COLOR;
108 if(mesa_fp->Base.OutputsWritten & unBit)
109 {
110 pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
111 pAsm->number_of_exports++;
112 pAsm->number_of_colorandz_exports++;
113 }
114 unBit = 1 << FRAG_RESULT_DEPTH;
115 if(mesa_fp->Base.OutputsWritten & unBit)
116 {
117 pAsm->depth_export_register_number = pAsm->number_used_registers;
118 pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
119 pAsm->number_of_exports++;
120 pAsm->number_of_colorandz_exports++;
121 pAsm->pR700Shader->depthIsExported = 1;
122 }
123
124 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
125 for(ui=0; ui<pAsm->number_of_exports; ui++)
126 {
127 pAsm->pucOutMask[ui] = 0x0;
128 }
129
130 pAsm->uFirstHelpReg = pAsm->number_used_registers;
131 }
132
133 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
134 struct gl_fragment_program *mesa_fp)
135 {
136 GLuint i, j;
137 GLint * puiTEMPwrites;
138 GLint * puiTEMPreads;
139 struct prog_instruction * pILInst;
140 InstDeps *pInstDeps;
141 struct prog_instruction * texcoord_DepInst;
142 GLint nDepInstID;
143
144 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
145 puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
146
147 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
148 {
149 puiTEMPwrites[i] = -1;
150 puiTEMPreads[i] = -1;
151 }
152
153 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
154
155 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
156 {
157 pInstDeps[i].nDstDep = -1;
158 pILInst = &(mesa_fp->Base.Instructions[i]);
159
160 //Dst
161 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
162 {
163 //Set lastwrite for the temp
164 puiTEMPwrites[pILInst->DstReg.Index] = i;
165 }
166
167 //Src
168 for(j=0; j<3; j++)
169 {
170 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
171 {
172 //Set dep.
173 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
174 //Set first read
175 if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
176 {
177 puiTEMPreads[pILInst->SrcReg[j].Index] = i;
178 }
179 }
180 else
181 {
182 pInstDeps[i].nSrcDeps[j] = -1;
183 }
184 }
185 }
186
187 fp->r700AsmCode.pInstDeps = pInstDeps;
188
189 //Find dep for tex inst
190 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
191 {
192 pILInst = &(mesa_fp->Base.Instructions[i]);
193
194 if(GL_TRUE == IsTex(pILInst->Opcode))
195 { //src0 is the tex coord register, src1 is texunit, src2 is textype
196 nDepInstID = pInstDeps[i].nSrcDeps[0];
197 if(nDepInstID >= 0)
198 {
199 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
200 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
201 {
202 pInstDeps[nDepInstID].nDstDep = i;
203 pInstDeps[i].nDstDep = i;
204 }
205 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
206 {
207 pInstDeps[i].nDstDep = i;
208 }
209 else
210 { //... other deps?
211 }
212 }
213 // make sure that we dont overwrite src used earlier
214 nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
215 if(nDepInstID < i)
216 {
217 pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
218 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
219 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
220 {
221 pInstDeps[nDepInstID].nDstDep = i;
222 }
223
224 }
225
226 }
227 }
228
229 FREE(puiTEMPwrites);
230 FREE(puiTEMPreads);
231
232 return GL_TRUE;
233 }
234
235 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
236 struct gl_fragment_program *mesa_fp)
237 {
238 GLuint number_of_colors_exported;
239 GLboolean z_enabled = GL_FALSE;
240 GLuint unBit;
241
242 //Init_Program
243 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
244 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
245
246 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
247 {
248 return GL_FALSE;
249 }
250
251 if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
252 &(mesa_fp->Base.Instructions[0]),
253 &(fp->r700AsmCode)) )
254 {
255 return GL_FALSE;
256 }
257
258 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
259 {
260 return GL_FALSE;
261 }
262
263 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
264 : (fp->r700AsmCode.number_used_registers - 1);
265
266 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
267
268 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
269
270 unBit = 1 << FRAG_RESULT_DEPTH;
271 if(mesa_fp->Base.OutputsWritten & unBit)
272 {
273 z_enabled = GL_TRUE;
274 number_of_colors_exported--;
275 }
276
277 /* illegal to set this to 0 */
278 if(number_of_colors_exported || z_enabled)
279 {
280 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
281 }
282 else
283 {
284 fp->r700Shader.exportMode = (1 << 1);
285 }
286
287 fp->translated = GL_TRUE;
288
289 return GL_TRUE;
290 }
291
292 void r700SelectFragmentShader(GLcontext *ctx)
293 {
294 context_t *context = R700_CONTEXT(ctx);
295 struct r700_fragment_program *fp = (struct r700_fragment_program *)
296 (ctx->FragmentProgram._Current);
297 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
298 {
299 fp->r700AsmCode.bR6xx = 1;
300 }
301
302 if (GL_FALSE == fp->translated)
303 r700TranslateFragmentShader(fp, &(fp->mesa_program));
304 }
305
306 void * r700GetActiveFpShaderBo(GLcontext * ctx)
307 {
308 struct r700_fragment_program *fp = (struct r700_fragment_program *)
309 (ctx->FragmentProgram._Current);
310
311 return fp->shaderbo;
312 }
313
314 GLboolean r700SetupFragmentProgram(GLcontext * ctx)
315 {
316 context_t *context = R700_CONTEXT(ctx);
317 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
318 struct r700_fragment_program *fp = (struct r700_fragment_program *)
319 (ctx->FragmentProgram._Current);
320 r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
321 struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
322 struct gl_program_parameter_list *paramList;
323 unsigned int unNumParamData;
324 unsigned int ui, i;
325 unsigned int unNumOfReg;
326 unsigned int unBit;
327 GLuint exportCount;
328
329 if(GL_FALSE == fp->loaded)
330 {
331 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
332 {
333 Assemble( &(fp->r700Shader) );
334 }
335
336 /* Load fp to gpu */
337 r600EmitShader(ctx,
338 &(fp->shaderbo),
339 (GLvoid *)(fp->r700Shader.pProgram),
340 fp->r700Shader.uShaderBinaryDWORDSize,
341 "FS");
342
343 fp->loaded = GL_TRUE;
344 }
345
346 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
347 fp->r700Shader.uShaderBinaryDWORDSize);
348
349 /* TODO : enable this after MemUse fixed *=
350 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
351 */
352
353 R600_STATECHANGE(context, ps);
354
355 r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
356 SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
357
358 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
359
360 R600_STATECHANGE(context, spi);
361
362 unNumOfReg = fp->r700Shader.nRegs + 1;
363
364 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
365
366 /* PS uses fragment.position */
367 if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
368 {
369 ui += 1;
370 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
371 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
372 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
373 SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
374 }
375 else
376 {
377 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
378 CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
379 }
380
381 ui = (unNumOfReg < ui) ? ui : unNumOfReg;
382
383 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
384
385 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
386
387 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
388 {
389 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
390 STACK_SIZE_shift, STACK_SIZE_mask);
391 }
392
393 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
394 EXPORT_MODE_shift, EXPORT_MODE_mask);
395
396 R600_STATECHANGE(context, db);
397
398 if(fp->r700Shader.killIsUsed)
399 {
400 SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
401 }
402 else
403 {
404 CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
405 }
406
407 if(fp->r700Shader.depthIsExported)
408 {
409 SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
410 }
411 else
412 {
413 CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
414 }
415
416 // emit ps input map
417 unBit = 1 << FRAG_ATTRIB_WPOS;
418 if(mesa_fp->Base.InputsRead & unBit)
419 {
420 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
421 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
422 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
423 SEMANTIC_shift, SEMANTIC_mask);
424 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
425 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
426 else
427 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
428 }
429
430 unBit = 1 << FRAG_ATTRIB_COL0;
431 if(mesa_fp->Base.InputsRead & unBit)
432 {
433 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
434 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
435 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
436 SEMANTIC_shift, SEMANTIC_mask);
437 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
438 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
439 else
440 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
441 }
442
443 unBit = 1 << FRAG_ATTRIB_COL1;
444 if(mesa_fp->Base.InputsRead & unBit)
445 {
446 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
447 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
448 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
449 SEMANTIC_shift, SEMANTIC_mask);
450 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
451 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
452 else
453 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
454 }
455
456 unBit = 1 << FRAG_ATTRIB_FOGC;
457 if(mesa_fp->Base.InputsRead & unBit)
458 {
459 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
460 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
461 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
462 SEMANTIC_shift, SEMANTIC_mask);
463 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
464 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
465 else
466 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
467 }
468
469 for(i=0; i<8; i++)
470 {
471 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
472 if(mesa_fp->Base.InputsRead & unBit)
473 {
474 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
475 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
476 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
477 SEMANTIC_shift, SEMANTIC_mask);
478 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
479 }
480 }
481
482 R600_STATECHANGE(context, cb);
483 exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
484 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
485
486 /* sent out shader constants. */
487 paramList = fp->mesa_program.Base.Parameters;
488
489 if(NULL != paramList) {
490 _mesa_load_state_parameters(ctx, paramList);
491
492 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
493 return GL_FALSE;
494
495 R600_STATECHANGE(context, ps_consts);
496
497 r700->ps.num_consts = paramList->NumParameters;
498
499 unNumParamData = paramList->NumParameters;
500
501 for(ui=0; ui<unNumParamData; ui++) {
502 r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
503 r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
504 r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
505 r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
506 }
507 } else
508 r700->ps.num_consts = 0;
509
510 return GL_TRUE;
511 }
512