Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / mesa / drivers / dri / r600 / r700_fragprog.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_statevars.h"
37 #include "program/program.h"
38
39 #include "r600_context.h"
40 #include "r600_cmdbuf.h"
41 #include "r600_emit.h"
42
43 #include "r700_fragprog.h"
44
45 #include "r700_debug.h"
46
47 void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
48 {
49 static const gl_state_index winstate[STATE_LENGTH]
50 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
51 struct prog_instruction *newInst, *inst;
52 GLint win_size; /* state reference */
53 GLuint wpos_temp; /* temp register */
54 int i, j;
55
56 /* PARAM win_size = STATE_FB_SIZE */
57 win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
58
59 wpos_temp = fprog->Base.NumTemporaries++;
60
61 /* scan program where WPOS is used and replace with wpos_temp */
62 inst = fprog->Base.Instructions;
63 for (i = 0; i < fprog->Base.NumInstructions; i++) {
64 for (j=0; j < 3; j++) {
65 if(inst->SrcReg[j].File == PROGRAM_INPUT &&
66 inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
67 inst->SrcReg[j].File = PROGRAM_TEMPORARY;
68 inst->SrcReg[j].Index = wpos_temp;
69 }
70 }
71 inst++;
72 }
73
74 _mesa_insert_instructions(&(fprog->Base), 0, 1);
75
76 newInst = fprog->Base.Instructions;
77 /* invert wpos.y
78 * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
79 newInst[0].Opcode = OPCODE_ADD;
80 newInst[0].DstReg.File = PROGRAM_TEMPORARY;
81 newInst[0].DstReg.Index = wpos_temp;
82 newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
83
84 newInst[0].SrcReg[0].File = PROGRAM_INPUT;
85 newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
86 newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
87 newInst[0].SrcReg[0].Negate = NEGATE_Y;
88
89 newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
90 newInst[0].SrcReg[1].Index = win_size;
91 newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
92
93 }
94
95 //TODO : Validate FP input with VP output.
96 void Map_Fragment_Program(r700_AssemblerBase *pAsm,
97 struct gl_fragment_program *mesa_fp,
98 GLcontext *ctx)
99 {
100 unsigned int unBit;
101 unsigned int i;
102 GLuint ui;
103
104 /* match fp inputs with vp exports. */
105 struct r700_vertex_program_cont *vpc =
106 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
107 GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
108
109 pAsm->number_used_registers = 0;
110
111 //Input mapping : mesa_fp->Base.InputsRead set the flag, set in
112 //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
113 //MUST match order in Map_Vertex_Output
114 unBit = 1 << FRAG_ATTRIB_WPOS;
115 if(mesa_fp->Base.InputsRead & unBit)
116 {
117 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
118 }
119
120 unBit = 1 << VERT_RESULT_COL0;
121 if(OutputsWritten & unBit)
122 {
123 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
124 }
125
126 unBit = 1 << VERT_RESULT_COL1;
127 if(OutputsWritten & unBit)
128 {
129 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
130 }
131
132 unBit = 1 << VERT_RESULT_FOGC;
133 if(OutputsWritten & unBit)
134 {
135 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
136 }
137
138 for(i=0; i<8; i++)
139 {
140 unBit = 1 << (VERT_RESULT_TEX0 + i);
141 if(OutputsWritten & unBit)
142 {
143 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
144 }
145 }
146
147 /* order has been taken care of */
148 #if 1
149 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
150 {
151 unBit = 1 << i;
152 if(OutputsWritten & unBit)
153 {
154 pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
155 }
156 }
157 #else
158 if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
159 {
160 struct r700_vertex_program_cont *vpc =
161 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
162 struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
163 struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
164 struct gl_program_parameter * pVsParam;
165 struct gl_program_parameter * pPsParam;
166 GLuint j, k;
167 GLuint unMaxVarying = 0;
168
169 for(i=0; i<VsVarying->NumParameters; i++)
170 {
171 pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
172 }
173
174 for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
175 {
176 unBit = 1 << i;
177 if(mesa_fp->Base.InputsRead & unBit)
178 {
179 j = i - FRAG_ATTRIB_VAR0;
180 pPsParam = PsVarying->Parameters + j;
181
182 for(k=0; k<VsVarying->NumParameters; k++)
183 {
184 pVsParam = VsVarying->Parameters + k;
185
186 if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
187 {
188 pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;
189 if(k > unMaxVarying)
190 {
191 unMaxVarying = k;
192 }
193 break;
194 }
195 }
196 }
197 }
198
199 pAsm->number_used_registers += unMaxVarying + 1;
200 }
201 #endif
202 unBit = 1 << FRAG_ATTRIB_FACE;
203 if(mesa_fp->Base.InputsRead & unBit)
204 {
205 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
206 }
207
208 unBit = 1 << FRAG_ATTRIB_PNTC;
209 if(mesa_fp->Base.InputsRead & unBit)
210 {
211 pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
212 }
213
214 /* Map temporary registers (GPRs) */
215 pAsm->starting_temp_register_number = pAsm->number_used_registers;
216
217 if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
218 {
219 pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
220 }
221 else
222 {
223 pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
224 }
225
226 /* Output mapping */
227 pAsm->number_of_exports = 0;
228 pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
229 pAsm->starting_export_register_number = pAsm->number_used_registers;
230
231 for (i = 0; i < FRAG_RESULT_MAX; ++i)
232 {
233 unBit = 1 << i;
234 if (mesa_fp->Base.OutputsWritten & unBit)
235 {
236 if (i == FRAG_RESULT_DEPTH)
237 {
238 pAsm->depth_export_register_number = pAsm->number_used_registers;
239 pAsm->pR700Shader->depthIsExported = 1;
240 }
241
242 pAsm->uiFP_OutputMap[i] = pAsm->number_used_registers++;
243 ++pAsm->number_of_exports;
244 ++pAsm->number_of_colorandz_exports;
245 }
246 }
247
248 pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
249 for(ui=0; ui<pAsm->number_of_exports; ui++)
250 {
251 pAsm->pucOutMask[ui] = 0x0;
252 }
253
254 pAsm->flag_reg_index = pAsm->number_used_registers++;
255
256 pAsm->uFirstHelpReg = pAsm->number_used_registers;
257 }
258
259 GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
260 struct gl_fragment_program *mesa_fp)
261 {
262 GLuint i, j;
263 GLint * puiTEMPwrites;
264 GLint * puiTEMPreads;
265 struct prog_instruction * pILInst;
266 InstDeps *pInstDeps;
267 struct prog_instruction * texcoord_DepInst;
268 GLint nDepInstID;
269
270 puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
271 puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
272
273 for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
274 {
275 puiTEMPwrites[i] = -1;
276 puiTEMPreads[i] = -1;
277 }
278
279 pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
280
281 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
282 {
283 pInstDeps[i].nDstDep = -1;
284 pILInst = &(mesa_fp->Base.Instructions[i]);
285
286 //Dst
287 if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
288 {
289 //Set lastwrite for the temp
290 puiTEMPwrites[pILInst->DstReg.Index] = i;
291 }
292
293 //Src
294 for(j=0; j<3; j++)
295 {
296 if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
297 {
298 //Set dep.
299 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
300 //Set first read
301 if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
302 {
303 puiTEMPreads[pILInst->SrcReg[j].Index] = i;
304 }
305 }
306 else
307 {
308 pInstDeps[i].nSrcDeps[j] = -1;
309 }
310 }
311 }
312
313 fp->r700AsmCode.pInstDeps = pInstDeps;
314
315 //Find dep for tex inst
316 for(i=0; i<mesa_fp->Base.NumInstructions; i++)
317 {
318 pILInst = &(mesa_fp->Base.Instructions[i]);
319
320 if(GL_TRUE == IsTex(pILInst->Opcode))
321 { //src0 is the tex coord register, src1 is texunit, src2 is textype
322 nDepInstID = pInstDeps[i].nSrcDeps[0];
323 if(nDepInstID >= 0)
324 {
325 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
326 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
327 {
328 pInstDeps[nDepInstID].nDstDep = i;
329 pInstDeps[i].nDstDep = i;
330 }
331 else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
332 {
333 pInstDeps[i].nDstDep = i;
334 }
335 else
336 { //... other deps?
337 }
338 }
339 // make sure that we dont overwrite src used earlier
340 nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
341 if(nDepInstID < i)
342 {
343 pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
344 texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
345 if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
346 {
347 pInstDeps[nDepInstID].nDstDep = i;
348 }
349
350 }
351
352 }
353 }
354
355 FREE(puiTEMPwrites);
356 FREE(puiTEMPreads);
357
358 return GL_TRUE;
359 }
360
361 GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
362 struct gl_fragment_program *mesa_fp,
363 GLcontext *ctx)
364 {
365 context_t *context = R700_CONTEXT(ctx);
366 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
367
368 GLuint number_of_colors_exported;
369 GLboolean z_enabled = GL_FALSE;
370 GLuint unBit, shadow_unit;
371 int i;
372 struct prog_instruction *inst;
373 gl_state_index shadow_ambient[STATE_LENGTH]
374 = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
375
376 //Init_Program
377 Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
378
379 if(GL_TRUE == r700->bShaderUseMemConstant)
380 {
381 fp->r700AsmCode.bUseMemConstant = GL_TRUE;
382 }
383 else
384 {
385 fp->r700AsmCode.bUseMemConstant = GL_FALSE;
386 }
387
388 fp->r700AsmCode.unAsic = 7;
389
390 if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
391 {
392 insert_wpos_code(ctx, mesa_fp);
393 }
394
395 /* add/map consts for ARB_shadow_ambient */
396 if(mesa_fp->Base.ShadowSamplers)
397 {
398 inst = mesa_fp->Base.Instructions;
399 for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
400 {
401 if(inst->TexShadow == 1)
402 {
403 shadow_unit = inst->TexSrcUnit;
404 shadow_ambient[2] = shadow_unit;
405 fp->r700AsmCode.shadow_regs[shadow_unit] =
406 _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
407 }
408 inst++;
409 }
410 }
411
412 Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
413
414 if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
415 {
416 return GL_FALSE;
417 }
418
419 InitShaderProgram(&(fp->r700AsmCode));
420
421 for(i=0; i < MAX_SAMPLERS; i++)
422 {
423 fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
424 }
425
426 fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
427
428 if( GL_FALSE == AssembleInstr(0,
429 0,
430 mesa_fp->Base.NumInstructions,
431 &(mesa_fp->Base.Instructions[0]),
432 &(fp->r700AsmCode)) )
433 {
434 return GL_FALSE;
435 }
436
437 if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
438 {
439 return GL_FALSE;
440 }
441
442 if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
443 {
444 return GL_FALSE;
445 }
446
447 fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
448 : (fp->r700AsmCode.number_used_registers - 1);
449
450 fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
451
452 number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
453
454 unBit = 1 << FRAG_RESULT_DEPTH;
455 if(mesa_fp->Base.OutputsWritten & unBit)
456 {
457 z_enabled = GL_TRUE;
458 number_of_colors_exported--;
459 }
460
461 /* illegal to set this to 0 */
462 if(number_of_colors_exported || z_enabled)
463 {
464 fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
465 }
466 else
467 {
468 fp->r700Shader.exportMode = (1 << 1);
469 }
470
471 fp->translated = GL_TRUE;
472
473 return GL_TRUE;
474 }
475
476 void r700SelectFragmentShader(GLcontext *ctx)
477 {
478 context_t *context = R700_CONTEXT(ctx);
479 struct r700_fragment_program *fp = (struct r700_fragment_program *)
480 (ctx->FragmentProgram._Current);
481 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
482 {
483 fp->r700AsmCode.bR6xx = 1;
484 }
485
486 if (GL_FALSE == fp->translated)
487 r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx);
488 }
489
490 void * r700GetActiveFpShaderBo(GLcontext * ctx)
491 {
492 struct r700_fragment_program *fp = (struct r700_fragment_program *)
493 (ctx->FragmentProgram._Current);
494
495 return fp->shaderbo;
496 }
497
498 void * r700GetActiveFpShaderConstBo(GLcontext * ctx)
499 {
500 struct r700_fragment_program *fp = (struct r700_fragment_program *)
501 (ctx->FragmentProgram._Current);
502
503 return fp->constbo0;
504 }
505
506 GLboolean r700SetupFragmentProgram(GLcontext * ctx)
507 {
508 context_t *context = R700_CONTEXT(ctx);
509 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
510 struct r700_fragment_program *fp = (struct r700_fragment_program *)
511 (ctx->FragmentProgram._Current);
512 r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
513 struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
514 struct gl_program_parameter_list *paramList;
515 unsigned int unNumParamData;
516 unsigned int ui, i;
517 unsigned int unNumOfReg;
518 unsigned int unBit;
519 GLuint exportCount;
520 GLboolean point_sprite = GL_FALSE;
521
522 if(GL_FALSE == fp->loaded)
523 {
524 if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
525 {
526 Assemble( &(fp->r700Shader) );
527 }
528
529 /* Load fp to gpu */
530 r600EmitShader(ctx,
531 &(fp->shaderbo),
532 (GLvoid *)(fp->r700Shader.pProgram),
533 fp->r700Shader.uShaderBinaryDWORDSize,
534 "FS");
535
536 fp->loaded = GL_TRUE;
537 }
538
539 DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
540 fp->r700Shader.uShaderBinaryDWORDSize);
541
542 /* TODO : enable this after MemUse fixed *=
543 (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
544 */
545
546 R600_STATECHANGE(context, ps);
547
548 r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
549 SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
550
551 r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
552
553 R600_STATECHANGE(context, spi);
554
555 unNumOfReg = fp->r700Shader.nRegs + 1;
556
557 ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
558
559 /* PS uses fragment.position */
560 if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
561 {
562 ui += 1;
563 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
564 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
565 SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
566 SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
567 }
568 else
569 {
570 CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
571 CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
572 }
573
574 if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
575 {
576 ui += 1;
577 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
578 SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
579 SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
580 SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
581 }
582 else
583 {
584 CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
585 }
586
587 /* see if we need any point_sprite replacements, also increase num_interp
588 * as there's no vp output for them */
589 if (ctx->Point.PointSprite)
590 {
591 for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
592 {
593 if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE)
594 {
595 ui++;
596 point_sprite = GL_TRUE;
597 }
598 }
599 }
600
601 if( mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
602 ui++;
603
604 if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
605 {
606 SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
607 SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
608 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
609 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
610 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
611 SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
612 /* Like e.g. viewport and winding, point sprite coordinates are
613 * inverted when rendering to FBO. */
614 if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == !ctx->DrawBuffer->Name)
615 SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
616 else
617 CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
618 }
619 else
620 {
621 CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
622 }
623
624
625 ui = (unNumOfReg < ui) ? ui : unNumOfReg;
626
627 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
628
629 CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
630
631 if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
632 {
633 SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
634 STACK_SIZE_shift, STACK_SIZE_mask);
635 }
636
637 SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
638 EXPORT_MODE_shift, EXPORT_MODE_mask);
639
640 // emit ps input map
641 struct r700_vertex_program_cont *vpc =
642 (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
643 GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
644
645 for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
646 r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
647
648 unBit = 1 << FRAG_ATTRIB_WPOS;
649 if(mesa_fp->Base.InputsRead & unBit)
650 {
651 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
652 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
653 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
654 SEMANTIC_shift, SEMANTIC_mask);
655 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
656 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
657 else
658 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
659 }
660
661 unBit = 1 << VERT_RESULT_COL0;
662 if(OutputsWritten & unBit)
663 {
664 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
665 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
666 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
667 SEMANTIC_shift, SEMANTIC_mask);
668 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
669 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
670 else
671 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
672 }
673
674 unBit = 1 << VERT_RESULT_COL1;
675 if(OutputsWritten & unBit)
676 {
677 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
678 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
679 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
680 SEMANTIC_shift, SEMANTIC_mask);
681 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
682 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
683 else
684 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
685 }
686
687 unBit = 1 << VERT_RESULT_FOGC;
688 if(OutputsWritten & unBit)
689 {
690 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
691 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
692 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
693 SEMANTIC_shift, SEMANTIC_mask);
694 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
695 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
696 else
697 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
698 }
699
700 for(i=0; i<8; i++)
701 {
702 GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i];
703 unBit = 1 << (VERT_RESULT_TEX0 + i);
704 if ((OutputsWritten & unBit) || coord_replace)
705 {
706 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
707 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
708 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
709 SEMANTIC_shift, SEMANTIC_mask);
710 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
711 /* ARB_point_sprite */
712 if (coord_replace)
713 {
714 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
715 }
716 }
717 }
718
719 unBit = 1 << FRAG_ATTRIB_FACE;
720 if(mesa_fp->Base.InputsRead & unBit)
721 {
722 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
723 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
724 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
725 SEMANTIC_shift, SEMANTIC_mask);
726 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
727 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
728 else
729 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
730 }
731 unBit = 1 << FRAG_ATTRIB_PNTC;
732 if(mesa_fp->Base.InputsRead & unBit)
733 {
734 ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
735 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
736 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
737 SEMANTIC_shift, SEMANTIC_mask);
738 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
739 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
740 else
741 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
742 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
743 }
744
745
746
747
748 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
749 {
750 unBit = 1 << i;
751 if(OutputsWritten & unBit)
752 {
753 ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
754 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
755 SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
756 SEMANTIC_shift, SEMANTIC_mask);
757 if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
758 SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
759 else
760 CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
761 }
762 }
763
764 exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
765 if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
766 {
767 R600_STATECHANGE(context, cb);
768 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
769 }
770
771 /* sent out shader constants. */
772 paramList = fp->mesa_program.Base.Parameters;
773
774 if(NULL != paramList)
775 {
776 _mesa_load_state_parameters(ctx, paramList);
777
778 if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
779 return GL_FALSE;
780
781 R600_STATECHANGE(context, ps_consts);
782
783 r700->ps.num_consts = paramList->NumParameters;
784
785 unNumParamData = paramList->NumParameters;
786
787 for(ui=0; ui<unNumParamData; ui++) {
788 r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
789 r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
790 r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
791 r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
792 }
793
794 /* Load fp constants to gpu */
795 if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) )
796 {
797 r600EmitShader(ctx,
798 &(fp->constbo0),
799 (GLvoid *)&(paramList->ParameterValues[0][0]),
800 unNumParamData * 4,
801 "FS Const");
802 }
803
804 } else
805 r700->ps.num_consts = 0;
806
807 COMPILED_SUB * pCompiledSub;
808 GLuint uj;
809 GLuint unConstOffset = r700->ps.num_consts;
810 for(ui=0; ui<pAsm->unNumPresub; ui++)
811 {
812 pCompiledSub = pAsm->presubs[ui].pCompiledSub;
813
814 r700->ps.num_consts += pCompiledSub->NumParameters;
815
816 for(uj=0; uj<pCompiledSub->NumParameters; uj++)
817 {
818 r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
819 r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
820 r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
821 r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
822 }
823 unConstOffset += pCompiledSub->NumParameters;
824 }
825
826 return GL_TRUE;
827 }
828