Merge branch 'glsl-to-tgsi'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_statevars.h"
37 #include "program/program.h"
38
39 #include "r600_context.h"
40 #include "r600_cmdbuf.h"
41 #include "r600_emit.h"
42
43 #include "r700_fragprog.h"
44
45 #include "r700_debug.h"
46
47 void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog)
48 {
49 static const gl_state_index winstate[STATE_LENGTH]
50 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0};
51 struct prog_instruction *newInst, *inst;
52 GLint win_size; /* state reference */
53 GLuint wpos_temp; /* temp register */
54 int i, j;
55
56 /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */
57 win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
58
59 wpos_temp = fprog->Base.NumTemporaries++;
60
61 /* scan program where WPOS is used and replace with wpos_temp */
62 inst = fprog->Base.Instructions;
63 for (i = 0; i < fprog->Base.NumInstructions; i++) {
64 for (j=0; j < 3; j++) {
65 if(inst->SrcReg[j].File == PROGRAM_INPUT &&
66 inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
67 inst->SrcReg[j].File = PROGRAM_TEMPORARY;
68 inst->SrcReg[j].Index = wpos_temp;
69 }
70 }
71 inst++;
72 }
73
74 _mesa_insert_instructions(&(fprog->Base), 0, 1);
75
76 newInst = fprog->Base.Instructions;
77 /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */
78 newInst[0].Opcode = OPCODE_MAD;
79 newInst[0].DstReg.File = PROGRAM_TEMPORARY;
80 newInst[0].DstReg.Index = wpos_temp;
81 newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
82
83 newInst[0].SrcReg[0].File = PROGRAM_INPUT;
84 newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
85 newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
86
87 newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
88 newInst[0].SrcReg[1].Index = win_size;
89 newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE);
90
91 newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR;
92 newInst[0].SrcReg[2].Index = win_size;
93 newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
94
95 }
96
97 //TODO : Validate FP input with VP output.
98 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
99 struct gl_fragment_program *mesa_fp,
100 struct gl_context *ctx)
101 {
102 unsigned int unBit;
103 unsigned int i;
104
105 /* match fp inputs with vp exports. */
106 struct r700_vertex_program_cont *vpc =
107 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
108 GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
109
110 pAsm->number_used_registers = 0;
111
112 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
113 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
114 //MUST match order in Map_Vertex_Output
115 unBit = 1 << FRAG_ATTRIB_WPOS;
116 if(mesa_fp->Base.InputsRead & unBit)
117 {
118 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
119 }
120
121 unBit = 1 << VERT_RESULT_COL0;
122 if(OutputsWritten & unBit)
123 {
124 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
125 }
126
127 unBit = 1 << VERT_RESULT_COL1;
128 if(OutputsWritten & unBit)
129 {
130 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
131 }
132
133 unBit = 1 << VERT_RESULT_FOGC;
134 if(OutputsWritten & unBit)
135 {
136 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
137 }
138
139 for(i=0; i<8; i++)
140 {
141 unBit = 1 << (VERT_RESULT_TEX0 + i);
142 if(OutputsWritten & unBit)
143 {
144 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
145 }
146 }
147
148 /* order has been taken care of */
149 #if 1
150 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
151 {
152 unBit = 1 << i;
153 if(OutputsWritten & unBit)
154 {
155 pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
156 }
157 }
158 #else
159 if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
160 {
161 struct r700_vertex_program_cont *vpc =
162 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
163 struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
164 struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
165 struct gl_program_parameter * pVsParam;
166 struct gl_program_parameter * pPsParam;
167 GLuint j, k;
168 GLuint unMaxVarying = 0;
169
170 for(i=0; i<VsVarying->NumParameters; i++)
171 {
172 pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
173 }
174
175 for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
176 {
177 unBit = 1 << i;
178 if(mesa_fp->Base.InputsRead & unBit)
179 {
180 j = i - FRAG_ATTRIB_VAR0;
181 pPsParam = PsVarying->Parameters + j;
182
183 for(k=0; k<VsVarying->NumParameters; k++)
184 {
185 pVsParam = VsVarying->Parameters + k;
186
187 if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
188 {
189 pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;
190 if(k > unMaxVarying)
191 {
192 unMaxVarying = k;
193 }
194 break;
195 }
196 }
197 }
198 }
199
200 pAsm->number_used_registers += unMaxVarying + 1;
201 }
202 #endif
203 unBit = 1 << FRAG_ATTRIB_FACE;
204 if(mesa_fp->Base.InputsRead & unBit)
205 {
206 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
207 }
208
209 unBit = 1 << FRAG_ATTRIB_PNTC;
210 if(mesa_fp->Base.InputsRead & unBit)
211 {
212 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
213 }
214
215 /* Map temporary registers (GPRs) */
216 pAsm->starting_temp_register_number = pAsm->number_used_registers;
217
218 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
219 {
220 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
221 }
222 else
223 {
224 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
225 }
226
227 /* Output mapping */
228 pAsm->number_of_exports = 0;
229 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
230 pAsm->starting_export_register_number = pAsm->number_used_registers;
231
232 for (i = 0; i < FRAG_RESULT_MAX; ++i)
233 {
234 unBit = 1 << i;
235 if (mesa_fp->Base.OutputsWritten & unBit)
236 {
237 if (i == FRAG_RESULT_DEPTH)
238 {
239 pAsm->depth_export_register_number = pAsm->number_used_registers;
240 pAsm->pR700Shader->depthIsExported = 1;
241 }
242
243 pAsm->uiFP_OutputMap[i] = pAsm->number_used_registers++;
244 ++pAsm->number_of_exports;
245 ++pAsm->number_of_colorandz_exports;
246 }
247 }
248
249 pAsm->flag_reg_index = pAsm->number_used_registers++;
250
251 pAsm->uFirstHelpReg = pAsm->number_used_registers;
252 }
253
254 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
255 struct gl_fragment_program *mesa_fp)
256 {
257 GLuint i, j;
258 GLint * puiTEMPwrites;
259 GLint * puiTEMPreads;
260 struct prog_instruction * pILInst;
261 InstDeps *pInstDeps;
262 struct prog_instruction * texcoord_DepInst;
263 GLint nDepInstID;
264
265 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
266 puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
267
268 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
269 {
270 puiTEMPwrites[i] = -1;
271 puiTEMPreads[i] = -1;
272 }
273
274 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
275
276 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
277 {
278 pInstDeps[i].nDstDep = -1;
279 pILInst = &(mesa_fp->Base.Instructions[i]);
280
281 //Dst
282 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
283 {
284 //Set lastwrite for the temp
285 puiTEMPwrites[pILInst->DstReg.Index] = i;
286 }
287
288 //Src
289 for(j=0; j<3; j++)
290 {
291 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
292 {
293 //Set dep.
294 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
295 //Set first read
296 if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
297 {
298 puiTEMPreads[pILInst->SrcReg[j].Index] = i;
299 }
300 }
301 else
302 {
303 pInstDeps[i].nSrcDeps[j] = -1;
304 }
305 }
306 }
307
308 fp->r700AsmCode.pInstDeps = pInstDeps;
309
310 //Find dep for tex inst
311 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
312 {
313 pILInst = &(mesa_fp->Base.Instructions[i]);
314
315 if(GL_TRUE == IsTex(pILInst->Opcode))
316 { //src0 is the tex coord register, src1 is texunit, src2 is textype
317 nDepInstID = pInstDeps[i].nSrcDeps[0];
318 if(nDepInstID >= 0)
319 {
320 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
321 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
322 {
323 pInstDeps[nDepInstID].nDstDep = i;
324 pInstDeps[i].nDstDep = i;
325 }
326 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
327 {
328 pInstDeps[i].nDstDep = i;
329 }
330 else
331 { //... other deps?
332 }
333 }
334 // make sure that we dont overwrite src used earlier
335 nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
336 if(nDepInstID < i)
337 {
338 pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
339 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
340 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
341 {
342 pInstDeps[nDepInstID].nDstDep = i;
343 }
344
345 }
346
347 }
348 }
349
350 FREE(puiTEMPwrites);
351 FREE(puiTEMPreads);
352
353 return GL_TRUE;
354 }
355
356 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
357 struct gl_fragment_program *mesa_fp,
358 struct gl_context *ctx)
359 {
360 context_t *context = R700_CONTEXT(ctx);
361 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
362
363 GLuint number_of_colors_exported;
364 GLboolean z_enabled = GL_FALSE;
365 GLuint unBit, shadow_unit;
366 int i;
367 struct prog_instruction *inst;
368 gl_state_index shadow_ambient[STATE_LENGTH]
369 = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
370
371 //Init_Program
372 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
373
374 if(GL_TRUE == r700->bShaderUseMemConstant)
375 {
376 fp->r700AsmCode.bUseMemConstant = GL_TRUE;
377 }
378 else
379 {
380 fp->r700AsmCode.bUseMemConstant = GL_FALSE;
381 }
382
383 fp->r700AsmCode.unAsic = 7;
384
385 if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
386 {
387 insert_wpos_code(ctx, mesa_fp);
388 }
389
390 /* add/map consts for ARB_shadow_ambient */
391 if(mesa_fp->Base.ShadowSamplers)
392 {
393 inst = mesa_fp->Base.Instructions;
394 for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
395 {
396 if(inst->TexShadow == 1)
397 {
398 shadow_unit = inst->TexSrcUnit;
399 shadow_ambient[2] = shadow_unit;
400 fp->r700AsmCode.shadow_regs[shadow_unit] =
401 _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
402 }
403 inst++;
404 }
405 }
406
407 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
408
409 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
410 {
411 return GL_FALSE;
412 }
413
414 InitShaderProgram(&(fp->r700AsmCode));
415
416 for(i=0; i < MAX_SAMPLERS; i++)
417 {
418 fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
419 }
420
421 fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
422
423 if( GL_FALSE == AssembleInstr(0,
424 0,
425 mesa_fp->Base.NumInstructions,
426 &(mesa_fp->Base.Instructions[0]),
427 &(fp->r700AsmCode)) )
428 {
429 return GL_FALSE;
430 }
431
432 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
433 {
434 return GL_FALSE;
435 }
436
437 if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
438 {
439 return GL_FALSE;
440 }
441
442 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
443 : (fp->r700AsmCode.number_used_registers - 1);
444
445 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
446
447 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
448
449 unBit = 1 << FRAG_RESULT_DEPTH;
450 if(mesa_fp->Base.OutputsWritten & unBit)
451 {
452 z_enabled = GL_TRUE;
453 number_of_colors_exported--;
454 }
455
456 /* illegal to set this to 0 */
457 if(number_of_colors_exported || z_enabled)
458 {
459 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
460 }
461 else
462 {
463 fp->r700Shader.exportMode = (1 << 1);
464 }
465
466 fp->translated = GL_TRUE;
467
468 return GL_TRUE;
469 }
470
471 void r700SelectFragmentShader(struct gl_context *ctx)
472 {
473 context_t *context = R700_CONTEXT(ctx);
474 struct r700_fragment_program *fp = (struct r700_fragment_program *)
475 (ctx->FragmentProgram._Current);
476 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
477 {
478 fp->r700AsmCode.bR6xx = 1;
479 }
480
481 if (GL_FALSE == fp->translated)
482 r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx);
483 }
484
485 void * r700GetActiveFpShaderBo(struct gl_context * ctx)
486 {
487 struct r700_fragment_program *fp = (struct r700_fragment_program *)
488 (ctx->FragmentProgram._Current);
489
490 return fp->shaderbo;
491 }
492
493 void * r700GetActiveFpShaderConstBo(struct gl_context * ctx)
494 {
495 struct r700_fragment_program *fp = (struct r700_fragment_program *)
496 (ctx->FragmentProgram._Current);
497
498 return fp->constbo0;
499 }
500
501 GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
502 {
503 context_t *context = R700_CONTEXT(ctx);
504 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
505 struct r700_fragment_program *fp = (struct r700_fragment_program *)
506 (ctx->FragmentProgram._Current);
507 r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
508 struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
509 struct gl_program_parameter_list *paramList;
510 unsigned int unNumParamData;
511 unsigned int ui, i;
512 unsigned int unNumOfReg;
513 unsigned int unBit;
514 unsigned int num_sq_ps_gprs;
515 GLuint exportCount;
516 GLboolean point_sprite = GL_FALSE;
517
518 if(GL_FALSE == fp->loaded)
519 {
520 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
521 {
522 Assemble( &(fp->r700Shader) );
523 }
524
525 /* Load fp to gpu */
526 r600EmitShader(ctx,
527 &(fp->shaderbo),
528 (GLvoid *)(fp->r700Shader.pProgram),
529 fp->r700Shader.uShaderBinaryDWORDSize,
530 "FS");
531
532 fp->loaded = GL_TRUE;
533 }
534
535 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
536 fp->r700Shader.uShaderBinaryDWORDSize);
537
538 /* TODO : enable this after MemUse fixed *=
539 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
540 */
541
542 R600_STATECHANGE(context, ps);
543
544 r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
545 SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
546
547 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
548
549 R600_STATECHANGE(context, spi);
550
551 unNumOfReg = fp->r700Shader.nRegs + 1;
552
553 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
554
555 /* PS uses fragment.position */
556 if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
557 {
558 ui += 1;
559 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
560 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
561 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
562 SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
563 }
564 else
565 {
566 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
567 CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
568 }
569
570 if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
571 {
572 ui += 1;
573 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
574 SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
575 SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
576 SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
577 }
578 else
579 {
580 CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
581 }
582
583 /* see if we need any point_sprite replacements, also increase num_interp
584 * as there's no vp output for them */
585 if (ctx->Point.PointSprite)
586 {
587 for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
588 {
589 if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE)
590 {
591 ui++;
592 point_sprite = GL_TRUE;
593 }
594 }
595 }
596
597 if( mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
598 ui++;
599
600 if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
601 {
602 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
603 SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
604 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
605 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
606 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
607 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
608 /* Like e.g. viewport and winding, point sprite coordinates are
609 * inverted when rendering to FBO. */
610 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == !ctx->DrawBuffer->Name)
611 SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
612 else
613 CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
614 }
615 else
616 {
617 CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
618 }
619
620
621 ui = (unNumOfReg < ui) ? ui : unNumOfReg;
622
623 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
624
625 num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift);
626
627 if(ui > num_sq_ps_gprs)
628 {
629 /* care! thich changes sq - needs idle state */
630 R600_STATECHANGE(context, sq);
631 SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
632 }
633
634 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
635
636 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
637 {
638 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
639 STACK_SIZE_shift, STACK_SIZE_mask);
640 }
641
642 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
643 EXPORT_MODE_shift, EXPORT_MODE_mask);
644
645 // emit ps input map
646 struct r700_vertex_program_cont *vpc =
647 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
648 GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
649
650 for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
651 r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
652
653 unBit = 1 << FRAG_ATTRIB_WPOS;
654 if(mesa_fp->Base.InputsRead & unBit)
655 {
656 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
657 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
658 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
659 SEMANTIC_shift, SEMANTIC_mask);
660 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
661 }
662
663 unBit = 1 << VERT_RESULT_COL0;
664 if(OutputsWritten & unBit)
665 {
666 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
667 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
668 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
669 SEMANTIC_shift, SEMANTIC_mask);
670 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
671 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
672 else
673 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
674 }
675
676 unBit = 1 << VERT_RESULT_COL1;
677 if(OutputsWritten & unBit)
678 {
679 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
680 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
681 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
682 SEMANTIC_shift, SEMANTIC_mask);
683 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
684 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
685 else
686 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
687 }
688
689 unBit = 1 << VERT_RESULT_FOGC;
690 if(OutputsWritten & unBit)
691 {
692 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
693 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
694 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
695 SEMANTIC_shift, SEMANTIC_mask);
696 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
697 }
698
699 for(i=0; i<8; i++)
700 {
701 GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i];
702 unBit = 1 << (VERT_RESULT_TEX0 + i);
703 if ((OutputsWritten & unBit) || coord_replace)
704 {
705 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
706 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
707 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
708 SEMANTIC_shift, SEMANTIC_mask);
709 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
710 /* ARB_point_sprite */
711 if (coord_replace)
712 {
713 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
714 }
715 }
716 }
717
718 unBit = 1 << FRAG_ATTRIB_FACE;
719 if(mesa_fp->Base.InputsRead & unBit)
720 {
721 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
722 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
723 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
724 SEMANTIC_shift, SEMANTIC_mask);
725 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
726 }
727 unBit = 1 << FRAG_ATTRIB_PNTC;
728 if(mesa_fp->Base.InputsRead & unBit)
729 {
730 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
731 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
732 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
733 SEMANTIC_shift, SEMANTIC_mask);
734 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
735 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
736 else
737 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
738 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
739 }
740
741
742
743
744 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
745 {
746 unBit = 1 << i;
747 if(OutputsWritten & unBit)
748 {
749 ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
750 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
751 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
752 SEMANTIC_shift, SEMANTIC_mask);
753 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
754 }
755 }
756
757 exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
758 if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
759 {
760 R600_STATECHANGE(context, cb);
761 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
762 }
763
764 /* sent out shader constants. */
765 paramList = fp->mesa_program.Base.Parameters;
766
767 if(NULL != paramList)
768 {
769 _mesa_load_state_parameters(ctx, paramList);
770
771 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
772 return GL_FALSE;
773
774 R600_STATECHANGE(context, ps_consts);
775
776 r700->ps.num_consts = paramList->NumParameters;
777
778 unNumParamData = paramList->NumParameters;
779
780 for(ui=0; ui<unNumParamData; ui++) {
781 r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
782 r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
783 r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
784 r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
785 }
786
787 /* Load fp constants to gpu */
788 if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) )
789 {
790 r600EmitShader(ctx,
791 &(fp->constbo0),
792 (GLvoid *)&(paramList->ParameterValues[0][0]),
793 unNumParamData * 4,
794 "FS Const");
795 }
796
797 } else
798 r700->ps.num_consts = 0;
799
800 COMPILED_SUB * pCompiledSub;
801 GLuint uj;
802 GLuint unConstOffset = r700->ps.num_consts;
803 for(ui=0; ui<pAsm->unNumPresub; ui++)
804 {
805 pCompiledSub = pAsm->presubs[ui].pCompiledSub;
806
807 r700->ps.num_consts += pCompiledSub->NumParameters;
808
809 for(uj=0; uj<pCompiledSub->NumParameters; uj++)
810 {
811 r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
812 r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
813 r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
814 r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
815 }
816 unConstOffset += pCompiledSub->NumParameters;
817 }
818
819 return GL_TRUE;
820 }
821