4753c757a1383c9f7a0d68aa4d8004e2bd2613b9
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r600_tex.h"
51
52 #include "r700_vertprog.h"
53 #include "r700_fragprog.h"
54 #include "r700_state.h"
55
56 #include "radeon_common_context.h"
57
58 void r700WaitForIdle(context_t *context);
59 void r700WaitForIdleClean(context_t *context);
60 GLboolean r700SendTextureState(context_t *context);
61 static unsigned int r700PrimitiveType(int prim);
62 void r600UpdateTextureState(GLcontext * ctx);
63 GLboolean r700SyncSurf(context_t *context,
64 struct radeon_bo *pbo,
65 uint32_t read_domain,
66 uint32_t write_domain,
67 uint32_t sync_type);
68
69 void r700WaitForIdle(context_t *context)
70 {
71 BATCH_LOCALS(&context->radeon);
72 BEGIN_BATCH_NO_AUTOSTATE(3);
73
74 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
75 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
76 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
77
78 END_BATCH();
79 COMMIT_BATCH();
80 }
81
82 void r700WaitForIdleClean(context_t *context)
83 {
84 BATCH_LOCALS(&context->radeon);
85 BEGIN_BATCH_NO_AUTOSTATE(5);
86
87 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
88 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
89
90 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
91 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
92 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
93
94 END_BATCH();
95 COMMIT_BATCH();
96 }
97
98 void r700Start3D(context_t *context)
99 {
100 BATCH_LOCALS(&context->radeon);
101 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
102 {
103 BEGIN_BATCH_NO_AUTOSTATE(2);
104 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
105 R600_OUT_BATCH(0);
106 END_BATCH();
107 }
108
109 BEGIN_BATCH_NO_AUTOSTATE(3);
110 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
111 R600_OUT_BATCH(0x80000000);
112 R600_OUT_BATCH(0x80000000);
113 END_BATCH();
114
115 COMMIT_BATCH();
116
117 r700WaitForIdleClean(context);
118 }
119
120 GLboolean r700SyncSurf(context_t *context,
121 struct radeon_bo *pbo,
122 uint32_t read_domain,
123 uint32_t write_domain,
124 uint32_t sync_type)
125 {
126 BATCH_LOCALS(&context->radeon);
127 uint32_t cp_coher_size;
128
129 if (!pbo)
130 return GL_FALSE;
131
132 if (pbo->size == 0xffffffff)
133 cp_coher_size = 0xffffffff;
134 else
135 cp_coher_size = ((pbo->size + 255) >> 8);
136
137 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
138 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
139 R600_OUT_BATCH(sync_type);
140 R600_OUT_BATCH(cp_coher_size);
141 R600_OUT_BATCH(0);
142 R600_OUT_BATCH(10);
143 R600_OUT_BATCH_RELOC(0,
144 pbo,
145 0,
146 read_domain, write_domain, 0);
147 END_BATCH();
148 COMMIT_BATCH();
149
150 return GL_TRUE;
151 }
152
153 static unsigned int r700PrimitiveType(int prim)
154 {
155 switch (prim & PRIM_MODE_MASK)
156 {
157 case GL_POINTS:
158 return DI_PT_POINTLIST;
159 break;
160 case GL_LINES:
161 return DI_PT_LINELIST;
162 break;
163 case GL_LINE_STRIP:
164 return DI_PT_LINESTRIP;
165 break;
166 case GL_LINE_LOOP:
167 return DI_PT_LINELOOP;
168 break;
169 case GL_TRIANGLES:
170 return DI_PT_TRILIST;
171 break;
172 case GL_TRIANGLE_STRIP:
173 return DI_PT_TRISTRIP;
174 break;
175 case GL_TRIANGLE_FAN:
176 return DI_PT_TRIFAN;
177 break;
178 case GL_QUADS:
179 return DI_PT_QUADLIST;
180 break;
181 case GL_QUAD_STRIP:
182 return DI_PT_QUADSTRIP;
183 break;
184 case GL_POLYGON:
185 return DI_PT_POLYGON;
186 break;
187 default:
188 assert(0);
189 return -1;
190 break;
191 }
192 }
193
194 static int r700NumVerts(int num_verts, int prim)
195 {
196 int verts_off = 0;
197
198 switch (prim & PRIM_MODE_MASK) {
199 case GL_POINTS:
200 verts_off = 0;
201 break;
202 case GL_LINES:
203 verts_off = num_verts % 2;
204 break;
205 case GL_LINE_STRIP:
206 if (num_verts < 2)
207 verts_off = num_verts;
208 break;
209 case GL_LINE_LOOP:
210 if (num_verts < 2)
211 verts_off = num_verts;
212 break;
213 case GL_TRIANGLES:
214 verts_off = num_verts % 3;
215 break;
216 case GL_TRIANGLE_STRIP:
217 if (num_verts < 3)
218 verts_off = num_verts;
219 break;
220 case GL_TRIANGLE_FAN:
221 if (num_verts < 3)
222 verts_off = num_verts;
223 break;
224 case GL_QUADS:
225 verts_off = num_verts % 4;
226 break;
227 case GL_QUAD_STRIP:
228 if (num_verts < 4)
229 verts_off = num_verts;
230 else
231 verts_off = num_verts % 2;
232 break;
233 case GL_POLYGON:
234 if (num_verts < 3)
235 verts_off = num_verts;
236 break;
237 default:
238 assert(0);
239 return -1;
240 break;
241 }
242
243 return num_verts - verts_off;
244 }
245
246 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
247 {
248 context_t *context = R700_CONTEXT(ctx);
249 BATCH_LOCALS(&context->radeon);
250 int type, i, total_emit;
251 int num_indices;
252 uint32_t vgt_draw_initiator = 0;
253 uint32_t vgt_index_type = 0;
254 uint32_t vgt_primitive_type = 0;
255 uint32_t vgt_num_indices = 0;
256
257 type = r700PrimitiveType(prim);
258 num_indices = r700NumVerts(end - start, prim);
259
260 if (type < 0 || num_indices <= 0)
261 return;
262
263 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
264 + 2 /* VGT_INDEX_TYPE */
265 + 2 /* NUM_INSTANCES */
266 + num_indices + 3; /* DRAW_INDEX_IMMD */
267
268 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
269 // prim
270 SETfield(vgt_primitive_type, type,
271 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
272 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
273 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
274 R600_OUT_BATCH(vgt_primitive_type);
275
276 // index type
277 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
278 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
279 R600_OUT_BATCH(vgt_index_type);
280
281 // num instances
282 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
283 R600_OUT_BATCH(1);
284
285 // draw packet
286 vgt_num_indices = num_indices;
287 SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
288 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
289
290 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
291 R600_OUT_BATCH(vgt_num_indices);
292 R600_OUT_BATCH(vgt_draw_initiator);
293
294 for (i = start; i < (start + num_indices); i++) {
295 R600_OUT_BATCH(i);
296 }
297 END_BATCH();
298 COMMIT_BATCH();
299
300 }
301
302 /* start 3d, idle, cb/db flush */
303 #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
304
305 static GLuint r700PredictRenderSize(GLcontext* ctx)
306 {
307 context_t *context = R700_CONTEXT(ctx);
308 TNLcontext *tnl = TNL_CONTEXT(ctx);
309 struct r700_vertex_program *vpc
310 = (struct r700_vertex_program *)ctx->VertexProgram._Current;
311 struct vertex_buffer *vb = &tnl->vb;
312 GLboolean flushed;
313 GLuint dwords, i;
314 GLuint state_size;
315 /* pre calculate aos count so state prediction works */
316 context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead);
317
318 dwords = PRE_EMIT_STATE_BUFSZ;
319 for (i = 0; i < vb->PrimitiveCount; i++)
320 dwords += vb->Primitive[i].count + 10;
321 state_size = radeonCountStateEmitSize(&context->radeon);
322 flushed = rcommonEnsureCmdBufSpace(&context->radeon,
323 dwords + state_size, __FUNCTION__);
324
325 if (flushed)
326 dwords += radeonCountStateEmitSize(&context->radeon);
327 else
328 dwords += state_size;
329
330 radeon_print(RADEON_RENDER, RADEON_VERBOSE,
331 "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
332 return dwords;
333 }
334
335 static GLboolean r700RunRender(GLcontext * ctx,
336 struct tnl_pipeline_stage *stage)
337 {
338 context_t *context = R700_CONTEXT(ctx);
339 radeonContextPtr radeon = &context->radeon;
340 unsigned int i, id = 0;
341 TNLcontext *tnl = TNL_CONTEXT(ctx);
342 struct vertex_buffer *vb = &tnl->vb;
343 struct radeon_renderbuffer *rrb;
344
345 radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n",
346 __func__, context->radeon.cmdbuf.cs->cdw);
347
348 /* always emit CB base to prevent
349 * lock ups on some chips.
350 */
351 R600_STATECHANGE(context, cb_target);
352 /* mark vtx as dirty since it changes per-draw */
353 R600_STATECHANGE(context, vtx);
354
355 r700UpdateShaders(ctx);
356 r700SetScissor(context);
357 r700SetupVertexProgram(ctx);
358 r700SetupFragmentProgram(ctx);
359 r600UpdateTextureState(ctx);
360
361 GLuint emit_end = r700PredictRenderSize(ctx)
362 + context->radeon.cmdbuf.cs->cdw;
363 r700SetupStreams(ctx);
364
365 radeonEmitState(radeon);
366
367 /* richard test code */
368 for (i = 0; i < vb->PrimitiveCount; i++) {
369 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
370 GLuint start = vb->Primitive[i].start;
371 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
372 r700RunRenderPrimitive(ctx, start, end, prim);
373 }
374
375 /* Flush render op cached for last several quads. */
376 r700WaitForIdleClean(context);
377
378 rrb = radeon_get_colorbuffer(&context->radeon);
379 if (rrb && rrb->bo)
380 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
381 CB_ACTION_ENA_bit | (1 << (id + 6)));
382
383 rrb = radeon_get_depthbuffer(&context->radeon);
384 if (rrb && rrb->bo)
385 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
386 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
387
388 radeonReleaseArrays(ctx, ~0);
389
390 radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n",
391 __func__, context->radeon.cmdbuf.cs->cdw);
392
393 if ( emit_end < context->radeon.cmdbuf.cs->cdw )
394 WARN_ONCE("Rendering was %d commands larger than predicted size."
395 " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
396
397 return GL_FALSE;
398 }
399
400 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
401 struct tnl_pipeline_stage *stage) /* -------------------- */
402 {
403 GLboolean bRet = GL_TRUE;
404
405 return bRet;
406 }
407
408 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
409 struct tnl_pipeline_stage *stage)
410 {
411 GLboolean bRet = GL_FALSE;
412
413 /* TODO : sw fallback */
414
415 /**
416 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
417 */
418 if(!r600ValidateBuffers(ctx))
419 {
420 return GL_TRUE;
421 }
422
423 bRet = r700RunRender(ctx, stage);
424
425 return bRet;
426 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
427 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
428 }
429
430 const struct tnl_pipeline_stage _r700_render_stage = {
431 "r700 Hardware Rasterization",
432 NULL,
433 NULL,
434 NULL,
435 NULL,
436 r700RunNonTCLRender
437 };
438
439 const struct tnl_pipeline_stage _r700_tcl_stage = {
440 "r700 Hardware Transform, Clipping and Lighting",
441 NULL,
442 NULL,
443 NULL,
444 NULL,
445 r700RunTCLRender
446 };
447
448 const struct tnl_pipeline_stage *r700_pipeline[] =
449 {
450 &_r700_tcl_stage,
451 &_tnl_vertex_transform_stage,
452 &_tnl_normal_transform_stage,
453 &_tnl_lighting_stage,
454 &_tnl_fog_coordinate_stage,
455 &_tnl_texgen_stage,
456 &_tnl_texture_transform_stage,
457 &_tnl_vertex_program_stage,
458
459 &_r700_render_stage,
460 &_tnl_render_stage,
461 0,
462 };
463
464