Fix r6 code bugs.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r700_chip.h"
51 #include "r700_tex.h"
52
53 #include "r700_vertprog.h"
54 #include "r700_fragprog.h"
55 #include "r700_state.h"
56
57 void r700WaitForIdle(context_t *context)
58 {
59 BATCH_LOCALS(&context->radeon);
60 BEGIN_BATCH_NO_AUTOSTATE(3);
61
62 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
63 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
64 R600_OUT_BATCH(1 << 15);
65
66 END_BATCH();
67 COMMIT_BATCH();
68 }
69
70 void r700WaitForIdleClean(context_t *context)
71 {
72 BATCH_LOCALS(&context->radeon);
73 BEGIN_BATCH_NO_AUTOSTATE(5);
74
75 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
76 R600_OUT_BATCH(0x16);
77
78 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
79 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
80 R600_OUT_BATCH(1 << 17);
81
82 END_BATCH();
83 COMMIT_BATCH();
84 }
85
86 static void r700Start3D(context_t *context)
87 {
88 BATCH_LOCALS(&context->radeon);
89 if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670)
90 {
91 BEGIN_BATCH_NO_AUTOSTATE(2);
92 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 1));
93 R600_OUT_BATCH(0);
94 END_BATCH();
95 }
96
97 BEGIN_BATCH_NO_AUTOSTATE(3);
98 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
99 R600_OUT_BATCH(0x80000000);
100 R600_OUT_BATCH(0x80000000);
101 END_BATCH();
102
103 COMMIT_BATCH();
104
105 r700WaitForIdleClean(context);
106 }
107
108 static GLboolean r700SetupShaders(GLcontext * ctx)
109 {
110 context_t *context = R700_CONTEXT(ctx);
111
112 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
113
114 GLuint exportCount;
115
116 r700->SQ_PGM_RESOURCES_PS.u32All = 0;
117 r700->SQ_PGM_RESOURCES_VS.u32All = 0;
118
119 SETbit(r700->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
120 SETbit(r700->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
121
122 r700SetupVertexProgram(ctx);
123
124 r700SetupFragmentProgram(ctx);
125
126 exportCount = (r700->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
127 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
128
129 return GL_TRUE;
130 }
131
132 GLboolean r700SendTextureState(context_t *context)
133 {
134 unsigned int i;
135
136 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
137 #if 0 /* to be enabled */
138 for(i=0; i<R700_TEXTURE_NUMBERUNITS; i++)
139 {
140 if(r700->texture_states.textures[i] != 0)
141 {
142 R700_CMDBUF_CHECK_SPACE(9);
143 R700EP3 (context, IT_SET_RESOURCE, 7);
144 R700E32 (context, i * 7);
145 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE0.u32All);
146 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE1.u32All);
147 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE2.u32All);
148 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE3.u32All);
149 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE4.u32All);
150 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE5.u32All);
151 R700E32 (context, r700->texture_states.textures[i]->SQ_TEX_RESOURCE6.u32All);
152 }
153
154 if(r700->texture_states.samplers[i] != 0)
155 {
156 R700_CMDBUF_CHECK_SPACE(5);
157 R700EP3 (context, IT_SET_SAMPLER, 3);
158 R700E32 (context, i * 3); // Base at 0x7000
159 R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER0.u32All);
160 R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER1.u32All);
161 R700E32 (context, r700->texture_states.samplers[i]->SQ_TEX_SAMPLER2.u32All);
162 }
163 }
164 #endif
165 return GL_TRUE;
166 }
167
168 GLboolean r700SyncSurf(context_t *context)
169 {
170 #if 0 //to be enabled
171 BATCH_LOCALS(&context->radeon);
172
173 /* TODO : too heavy? */
174 unsigned int CP_COHER_CNTL = 0;
175
176 CP_COHER_CNTL |= TC_ACTION_ENA_bit
177 |VC_ACTION_ENA_bit
178 |CB_ACTION_ENA_bit
179 |DB_ACTION_ENA_bit
180 |SH_ACTION_ENA_bit
181 |SMX_ACTION_ENA_bit;
182
183
184 BEGIN_BATCH_NO_AUTOSTATE(5);
185
186 R600_OUT_BATCH(CP_PACKET3((IT_SURFACE_SYNC << 8), 3)));
187 R600_OUT_BATCH(CP_COHER_CNTL);
188 R600_OUT_BATCH(0xFFFFFFFF);
189 R600_OUT_BATCH(0x00000000);
190 R600_OUT_BATCH(10);
191
192 END_BATCH();
193 COMMIT_BATCH();
194 #endif
195 return GL_TRUE;
196 }
197
198 unsigned int r700PrimitiveType(int prim)
199 {
200 switch (prim & PRIM_MODE_MASK)
201 {
202 case GL_POINTS:
203 return DI_PT_POINTLIST;
204 break;
205 case GL_LINES:
206 return DI_PT_LINELIST;
207 break;
208 case GL_LINE_STRIP:
209 return DI_PT_LINESTRIP;
210 break;
211 case GL_LINE_LOOP:
212 return DI_PT_LINELOOP;
213 break;
214 case GL_TRIANGLES:
215 return DI_PT_TRILIST;
216 break;
217 case GL_TRIANGLE_STRIP:
218 return DI_PT_TRISTRIP;
219 break;
220 case GL_TRIANGLE_FAN:
221 return DI_PT_TRIFAN;
222 break;
223 case GL_QUADS:
224 return DI_PT_QUADLIST;
225 break;
226 case GL_QUAD_STRIP:
227 return DI_PT_QUADSTRIP;
228 break;
229 case GL_POLYGON:
230 return DI_PT_POLYGON;
231 break;
232 default:
233 assert(0);
234 return -1;
235 break;
236 }
237 }
238
239 static GLboolean r700RunRender(GLcontext * ctx,
240 struct tnl_pipeline_stage *stage)
241 {
242 context_t *context = R700_CONTEXT(ctx);
243 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj);
244 #if 1
245 BATCH_LOCALS(&context->radeon);
246
247 unsigned int i, j;
248 TNLcontext *tnl = TNL_CONTEXT(ctx);
249 struct vertex_buffer *vb = &tnl->vb;
250
251 struct r700_fragment_program *fp = (struct r700_fragment_program *)
252 (ctx->FragmentProgram._Current);
253 if (context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV670)
254 {
255 fp->r700AsmCode.bR6xx = 1;
256 }
257
258 r700Start3D(context); /* TODO : this is too much. */
259
260 r700SyncSurf(context); /* TODO : make it light. */
261
262 r700UpdateShaders(ctx);
263
264 r700SetRenderTarget(context);
265
266 if(r700SetupStreams(ctx))
267 {
268 return GL_TRUE;
269 }
270
271 r700UpdateTextureState(context);
272 r700SendTextureState(context);
273
274 if(GL_FALSE == fp->translated)
275 {
276 if( GL_FALSE == r700TranslateFragmentShader(fp, &(fp->mesa_program)) )
277 {
278 return GL_TRUE;
279 }
280 }
281
282 r700SetupShaders(ctx);
283
284 /* set a valid base address to make the command checker happy */
285 r700->SQ_PGM_START_FS.u32All = r700->SQ_PGM_START_PS.u32All;
286 r700->SQ_PGM_START_ES.u32All = r700->SQ_PGM_START_PS.u32All;
287 r700->SQ_PGM_START_GS.u32All = r700->SQ_PGM_START_PS.u32All;
288
289 r700SendContextStates(context, GL_FALSE);
290
291 /* richard test code */
292 for (i = 0; i < vb->PrimitiveCount; i++)
293 {
294 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
295 GLuint start = vb->Primitive[i].start;
296 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
297 GLuint numIndices = vb->Primitive[i].count;
298 GLuint numEntires;
299 //r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
300
301 unsigned int VGT_DRAW_INITIATOR = 0;
302 unsigned int VGT_INDEX_TYPE = 0;
303 unsigned int VGT_PRIMITIVE_TYPE = 0;
304 unsigned int VGT_NUM_INDICES = 0;
305
306 numEntires = 2 /* VGT_INDEX_TYPE */
307 + 3 /* VGT_PRIMITIVE_TYPE */
308 + numIndices + 3; /* DRAW_INDEX_IMMD */
309
310 BEGIN_BATCH_NO_AUTOSTATE(numEntires);
311
312 VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
313
314 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
315 R600_OUT_BATCH(VGT_INDEX_TYPE);
316
317 VGT_NUM_INDICES = numIndices;
318
319 VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift;
320 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
321 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
322 R600_OUT_BATCH(VGT_PRIMITIVE_TYPE);
323
324 VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift;
325 VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift;
326
327 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1)));
328 R600_OUT_BATCH(VGT_NUM_INDICES);
329 R600_OUT_BATCH(VGT_DRAW_INITIATOR);
330
331 for (j=0; j<numIndices; j++)
332 {
333 R600_OUT_BATCH(j);
334 }
335 END_BATCH();
336 COMMIT_BATCH();
337 }
338
339 /* Flush render op cached for last several quads. */
340 BEGIN_BATCH_NO_AUTOSTATE(2);
341 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
342 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
343 END_BATCH();
344 COMMIT_BATCH();
345
346 (context->chipobj.FlushCmdBuffer)(context);
347
348 (context->chipobj.ReleaseArrays)(ctx);
349
350 //richard test
351 /* test stamp, write a number to mmSCRATCH4 */
352 BEGIN_BATCH_NO_AUTOSTATE(3);
353 R600_OUT_BATCH_REGVAL((0x2144 << 2), 0x56785678);
354 END_BATCH();
355 COMMIT_BATCH();
356 #endif //0
357 rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ );
358
359 return GL_FALSE;
360 }
361
362 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
363 struct tnl_pipeline_stage *stage) /* -------------------- */
364 {
365 GLboolean bRet = GL_TRUE;
366
367 return bRet;
368 }
369
370 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
371 struct tnl_pipeline_stage *stage)
372 {
373 GLboolean bRet = GL_FALSE;
374
375 /* TODO : sw fallback */
376
377 /**
378 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
379 */
380 if(!r700ValidateBuffers(ctx))
381 {
382 return GL_TRUE;
383 }
384
385 context_t *context = R700_CONTEXT(ctx);
386
387 r700UpdateShaders(ctx);
388
389 bRet = r700RunRender(ctx, stage);
390
391 return bRet;
392 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
393 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
394 }
395
396 const struct tnl_pipeline_stage _r700_render_stage = {
397 "r700 Hardware Rasterization",
398 NULL,
399 NULL,
400 NULL,
401 NULL,
402 r700RunNonTCLRender
403 };
404
405 const struct tnl_pipeline_stage _r700_tcl_stage = {
406 "r700 Hardware Transform, Clipping and Lighting",
407 NULL,
408 NULL,
409 NULL,
410 NULL,
411 r700RunTCLRender
412 };
413
414 const struct tnl_pipeline_stage *r700_pipeline[] =
415 {
416 &_r700_tcl_stage,
417 &_tnl_vertex_transform_stage,
418 &_tnl_normal_transform_stage,
419 &_tnl_lighting_stage,
420 &_tnl_fog_coordinate_stage,
421 &_tnl_texgen_stage,
422 &_tnl_texture_transform_stage,
423 &_tnl_vertex_program_stage,
424
425 &_r700_render_stage,
426 &_tnl_render_stage,
427 0,
428 };
429
430