r300g: Use radeon compiler for fragment programs
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36 #include "shader/prog_statevars.h"
37
38 #include "r600_context.h"
39 #include "r600_cmdbuf.h"
40
41 #include "r700_fragprog.h"
42
43 #include "r700_debug.h"
44
45 //TODO : Validate FP input with VP output.
46 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
47 struct gl_fragment_program *mesa_fp)
48 {
49 unsigned int unBit;
50 unsigned int i;
51 GLuint ui;
52
53 pAsm->number_used_registers = 0;
54
55 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
56 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
57 //MUST match order in Map_Vertex_Output
58 unBit = 1 << FRAG_ATTRIB_COL0;
59 if(mesa_fp->Base.InputsRead & unBit)
60 {
61 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
62 }
63
64 unBit = 1 << FRAG_ATTRIB_COL1;
65 if(mesa_fp->Base.InputsRead & unBit)
66 {
67 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
68 }
69
70 unBit = 1 << FRAG_ATTRIB_FOGC;
71 if(mesa_fp->Base.InputsRead & unBit)
72 {
73 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
74 }
75
76 for(i=0; i<8; i++)
77 {
78 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
79 if(mesa_fp->Base.InputsRead & unBit)
80 {
81 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
82 }
83 }
84
85 /* Map temporary registers (GPRs) */
86 pAsm->starting_temp_register_number = pAsm->number_used_registers;
87
88 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
89 {
90 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
91 }
92 else
93 {
94 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
95 }
96
97 /* Output mapping */
98 pAsm->number_of_exports = 0;
99 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
100 pAsm->starting_export_register_number = pAsm->number_used_registers;
101 unBit = 1 << FRAG_RESULT_COLOR;
102 if(mesa_fp->Base.OutputsWritten & unBit)
103 {
104 pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
105 pAsm->number_of_exports++;
106 pAsm->number_of_colorandz_exports++;
107 }
108 unBit = 1 << FRAG_RESULT_DEPTH;
109 if(mesa_fp->Base.OutputsWritten & unBit)
110 {
111 pAsm->depth_export_register_number = pAsm->number_used_registers;
112 pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
113 pAsm->number_of_exports++;
114 pAsm->number_of_colorandz_exports++;
115 }
116
117 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
118 for(ui=0; ui<pAsm->number_of_exports; ui++)
119 {
120 pAsm->pucOutMask[ui] = 0x0;
121 }
122
123 pAsm->uFirstHelpReg = pAsm->number_used_registers;
124 }
125
126 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
127 struct gl_fragment_program *mesa_fp)
128 {
129 GLuint i, j;
130 GLint * puiTEMPwrites;
131 struct prog_instruction * pILInst;
132 InstDeps *pInstDeps;
133 struct prog_instruction * texcoord_DepInst;
134 GLint nDepInstID;
135
136 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
137 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
138 {
139 puiTEMPwrites[i] = -1;
140 }
141
142 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
143
144 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
145 {
146 pInstDeps[i].nDstDep = -1;
147 pILInst = &(mesa_fp->Base.Instructions[i]);
148
149 //Dst
150 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
151 {
152 //Set lastwrite for the temp
153 puiTEMPwrites[pILInst->DstReg.Index] = i;
154 }
155
156 //Src
157 for(j=0; j<3; j++)
158 {
159 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
160 {
161 //Set dep.
162 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
163 }
164 else
165 {
166 pInstDeps[i].nSrcDeps[j] = -1;
167 }
168 }
169 }
170
171 fp->r700AsmCode.pInstDeps = pInstDeps;
172
173 FREE(puiTEMPwrites);
174
175 //Find dep for tex inst
176 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
177 {
178 pILInst = &(mesa_fp->Base.Instructions[i]);
179
180 if(GL_TRUE == IsTex(pILInst->Opcode))
181 { //src0 is the tex coord register, src1 is texunit, src2 is textype
182 nDepInstID = pInstDeps[i].nSrcDeps[0];
183 if(nDepInstID >= 0)
184 {
185 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
186 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
187 {
188 pInstDeps[nDepInstID].nDstDep = i;
189 pInstDeps[i].nDstDep = i;
190 }
191 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
192 {
193 pInstDeps[i].nDstDep = i;
194 }
195 else
196 { //... other deps?
197 }
198 }
199 }
200 }
201
202 return GL_TRUE;
203 }
204
205 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
206 struct gl_fragment_program *mesa_fp)
207 {
208 GLuint number_of_colors_exported;
209 GLboolean z_enabled = GL_FALSE;
210 GLuint unBit;
211
212 //Init_Program
213 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
214 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
215
216 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
217 {
218 return GL_FALSE;
219 }
220
221 if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
222 &(mesa_fp->Base.Instructions[0]),
223 &(fp->r700AsmCode)) )
224 {
225 return GL_FALSE;
226 }
227
228 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
229 {
230 return GL_FALSE;
231 }
232
233 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
234 : (fp->r700AsmCode.number_used_registers - 1);
235
236 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
237
238 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
239
240 unBit = 1 << FRAG_RESULT_DEPTH;
241 if(mesa_fp->Base.OutputsWritten & unBit)
242 {
243 z_enabled = GL_TRUE;
244 number_of_colors_exported--;
245 }
246
247 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
248
249 fp->translated = GL_TRUE;
250
251 return GL_TRUE;
252 }
253
254 void * r700GetActiveFpShaderBo(GLcontext * ctx)
255 {
256 struct r700_fragment_program *fp = (struct r700_fragment_program *)
257 (ctx->FragmentProgram._Current);
258
259 return fp->shaderbo;
260 }
261
262 GLboolean r700SetupFragmentProgram(GLcontext * ctx)
263 {
264 context_t *context = R700_CONTEXT(ctx);
265 BATCH_LOCALS(&context->radeon);
266 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
267
268 struct r700_fragment_program *fp = (struct r700_fragment_program *)
269 (ctx->FragmentProgram._Current);
270 r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
271 struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
272 struct gl_program_parameter_list *paramList;
273 unsigned int unNumParamData;
274 unsigned int ui, i;
275 unsigned int unNumOfReg;
276 unsigned int unBit;
277
278 if(GL_FALSE == fp->loaded)
279 {
280 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
281 {
282 Assemble( &(fp->r700Shader) );
283 }
284
285 /* Load fp to gpu */
286 r600EmitShader(ctx,
287 &(fp->shaderbo),
288 (GLvoid *)(fp->r700Shader.pProgram),
289 fp->r700Shader.uShaderBinaryDWORDSize,
290 "FS");
291
292 fp->loaded = GL_TRUE;
293 }
294
295 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
296 fp->r700Shader.uShaderBinaryDWORDSize);
297
298 /* TODO : enable this after MemUse fixed *=
299 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
300 */
301
302 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
303
304 unNumOfReg = fp->r700Shader.nRegs + 1;
305
306 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
307
308 ui = (unNumOfReg < ui) ? ui : unNumOfReg;
309
310 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
311
312 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
313
314 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
315 {
316 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
317 STACK_SIZE_shift, STACK_SIZE_mask);
318 }
319
320 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
321 EXPORT_MODE_shift, EXPORT_MODE_mask);
322
323 if(fp->r700Shader.killIsUsed)
324 {
325 SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
326 }
327 else
328 {
329 CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
330 }
331
332 if(fp->r700Shader.depthIsExported)
333 {
334 SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
335 }
336 else
337 {
338 CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
339 }
340
341 /* sent out shader constants. */
342
343 paramList = fp->mesa_program.Base.Parameters;
344
345 if(NULL != paramList)
346 {
347 _mesa_load_state_parameters(ctx, paramList);
348
349 unNumParamData = paramList->NumParameters * 4;
350
351 BEGIN_BATCH_NO_AUTOSTATE(2 + unNumParamData);
352
353 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, unNumParamData));
354
355 /* assembler map const from very beginning. */
356 R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
357
358 unNumParamData = paramList->NumParameters;
359
360 for(ui=0; ui<unNumParamData; ui++)
361 {
362 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][0])));
363 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][1])));
364 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][2])));
365 R600_OUT_BATCH(*((unsigned int*)&(paramList->ParameterValues[ui][3])));
366 }
367 END_BATCH();
368 COMMIT_BATCH();
369 }
370
371 // emit ps input map
372 unBit = 1 << FRAG_ATTRIB_COL0;
373 if(mesa_fp->Base.InputsRead & unBit)
374 {
375 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
376 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
377 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
378 SEMANTIC_shift, SEMANTIC_mask);
379 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
380 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
381 else
382 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
383 }
384
385 unBit = 1 << FRAG_ATTRIB_COL1;
386 if(mesa_fp->Base.InputsRead & unBit)
387 {
388 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
389 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
390 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
391 SEMANTIC_shift, SEMANTIC_mask);
392 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
393 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
394 else
395 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
396 }
397
398 unBit = 1 << FRAG_ATTRIB_FOGC;
399 if(mesa_fp->Base.InputsRead & unBit)
400 {
401 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
402 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
403 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
404 SEMANTIC_shift, SEMANTIC_mask);
405 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
406 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
407 else
408 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
409 }
410
411 for(i=0; i<8; i++)
412 {
413 unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
414 if(mesa_fp->Base.InputsRead & unBit)
415 {
416 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
417 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
418 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
419 SEMANTIC_shift, SEMANTIC_mask);
420 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
421 }
422 }
423
424 return GL_TRUE;
425 }
426
427
428