r600: convert to using common radeon state atoms
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "r600_context.h"
48 #include "r600_cmdbuf.h"
49
50 #include "r600_tex.h"
51
52 #include "r700_vertprog.h"
53 #include "r700_fragprog.h"
54 #include "r700_state.h"
55
56 void r700WaitForIdle(context_t *context);
57 void r700WaitForIdleClean(context_t *context);
58 void r700Start3D(context_t *context);
59 GLboolean r700SendTextureState(context_t *context);
60 static unsigned int r700PrimitiveType(int prim);
61 void r600UpdateTextureState(GLcontext * ctx);
62 GLboolean r700SyncSurf(context_t *context,
63 struct radeon_bo *pbo,
64 uint32_t read_domain,
65 uint32_t write_domain,
66 uint32_t sync_type);
67
68 void r700WaitForIdle(context_t *context)
69 {
70 BATCH_LOCALS(&context->radeon);
71 BEGIN_BATCH_NO_AUTOSTATE(3);
72
73 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
74 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
75 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
76
77 END_BATCH();
78 COMMIT_BATCH();
79 }
80
81 void r700WaitForIdleClean(context_t *context)
82 {
83 BATCH_LOCALS(&context->radeon);
84 BEGIN_BATCH_NO_AUTOSTATE(5);
85
86 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
87 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
88
89 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
90 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
91 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
92
93 END_BATCH();
94 COMMIT_BATCH();
95 }
96
97 void r700Start3D(context_t *context)
98 {
99 BATCH_LOCALS(&context->radeon);
100 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
101 {
102 BEGIN_BATCH_NO_AUTOSTATE(2);
103 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
104 R600_OUT_BATCH(0);
105 END_BATCH();
106 }
107
108 BEGIN_BATCH_NO_AUTOSTATE(3);
109 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
110 R600_OUT_BATCH(0x80000000);
111 R600_OUT_BATCH(0x80000000);
112 END_BATCH();
113
114 COMMIT_BATCH();
115
116 r700WaitForIdleClean(context);
117 }
118
119 static GLboolean r700SetupShaders(GLcontext * ctx)
120 {
121 context_t *context = R700_CONTEXT(ctx);
122
123 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
124
125 GLuint exportCount;
126
127 r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
128 r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
129
130 SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
131 SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
132
133 r700SetupVertexProgram(ctx);
134
135 r700SetupFragmentProgram(ctx);
136
137 exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
138 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
139
140 r600UpdateTextureState(ctx);
141
142 r700SendFSState(context); // FIXME just a place holder for now
143 r700SendPSState(context);
144 r700SendVSState(context);
145
146 r700SendTextureState(context);
147 r700SetupStreams(ctx);
148
149 return GL_TRUE;
150 }
151
152 GLboolean r700SyncSurf(context_t *context,
153 struct radeon_bo *pbo,
154 uint32_t read_domain,
155 uint32_t write_domain,
156 uint32_t sync_type)
157 {
158 BATCH_LOCALS(&context->radeon);
159 uint32_t cp_coher_size;
160
161 if (!pbo)
162 return GL_FALSE;
163
164 if (pbo->size == 0xffffffff)
165 cp_coher_size = 0xffffffff;
166 else
167 cp_coher_size = ((pbo->size + 255) >> 8);
168
169 BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
170 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
171 R600_OUT_BATCH(sync_type);
172 R600_OUT_BATCH(cp_coher_size);
173 R600_OUT_BATCH(0);
174 R600_OUT_BATCH(10);
175 R600_OUT_BATCH_RELOC(0,
176 pbo,
177 0,
178 read_domain, write_domain, 0); // ???
179
180 END_BATCH();
181 COMMIT_BATCH();
182
183 return GL_TRUE;
184 }
185
186 static unsigned int r700PrimitiveType(int prim)
187 {
188 switch (prim & PRIM_MODE_MASK)
189 {
190 case GL_POINTS:
191 return DI_PT_POINTLIST;
192 break;
193 case GL_LINES:
194 return DI_PT_LINELIST;
195 break;
196 case GL_LINE_STRIP:
197 return DI_PT_LINESTRIP;
198 break;
199 case GL_LINE_LOOP:
200 return DI_PT_LINELOOP;
201 break;
202 case GL_TRIANGLES:
203 return DI_PT_TRILIST;
204 break;
205 case GL_TRIANGLE_STRIP:
206 return DI_PT_TRISTRIP;
207 break;
208 case GL_TRIANGLE_FAN:
209 return DI_PT_TRIFAN;
210 break;
211 case GL_QUADS:
212 return DI_PT_QUADLIST;
213 break;
214 case GL_QUAD_STRIP:
215 return DI_PT_QUADSTRIP;
216 break;
217 case GL_POLYGON:
218 return DI_PT_POLYGON;
219 break;
220 default:
221 assert(0);
222 return -1;
223 break;
224 }
225 }
226
227 static int r700NumVerts(int num_verts, int prim)
228 {
229 int verts_off = 0;
230
231 switch (prim & PRIM_MODE_MASK) {
232 case GL_POINTS:
233 verts_off = 0;
234 break;
235 case GL_LINES:
236 verts_off = num_verts % 2;
237 break;
238 case GL_LINE_STRIP:
239 if (num_verts < 2)
240 verts_off = num_verts;
241 break;
242 case GL_LINE_LOOP:
243 if (num_verts < 2)
244 verts_off = num_verts;
245 break;
246 case GL_TRIANGLES:
247 verts_off = num_verts % 3;
248 break;
249 case GL_TRIANGLE_STRIP:
250 if (num_verts < 3)
251 verts_off = num_verts;
252 break;
253 case GL_TRIANGLE_FAN:
254 if (num_verts < 3)
255 verts_off = num_verts;
256 break;
257 case GL_QUADS:
258 verts_off = num_verts % 4;
259 break;
260 case GL_QUAD_STRIP:
261 if (num_verts < 4)
262 verts_off = num_verts;
263 else
264 verts_off = num_verts % 2;
265 break;
266 case GL_POLYGON:
267 if (num_verts < 3)
268 verts_off = num_verts;
269 break;
270 default:
271 assert(0);
272 return -1;
273 break;
274 }
275
276 return num_verts - verts_off;
277 }
278
279 static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
280 {
281 context_t *context = R700_CONTEXT(ctx);
282 BATCH_LOCALS(&context->radeon);
283 int type, i, total_emit;
284 int num_indices;
285 uint32_t vgt_draw_initiator = 0;
286 uint32_t vgt_index_type = 0;
287 uint32_t vgt_primitive_type = 0;
288 uint32_t vgt_num_indices = 0;
289
290 type = r700PrimitiveType(prim);
291 num_indices = r700NumVerts(end - start, prim);
292
293 if (type < 0 || num_indices <= 0)
294 return;
295
296 total_emit = 3 /* VGT_PRIMITIVE_TYPE */
297 + 2 /* VGT_INDEX_TYPE */
298 + 2 /* NUM_INSTANCES */
299 + num_indices + 3; /* DRAW_INDEX_IMMD */
300
301 BEGIN_BATCH_NO_AUTOSTATE(total_emit);
302 // prim
303 SETfield(vgt_primitive_type, type,
304 VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
305 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
306 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
307 R600_OUT_BATCH(vgt_primitive_type);
308
309 // index type
310 SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
311 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
312 R600_OUT_BATCH(vgt_index_type);
313
314 // num instances
315 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
316 R600_OUT_BATCH(1);
317
318 // draw packet
319 vgt_num_indices = num_indices;
320 SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
321 SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
322
323 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
324 R600_OUT_BATCH(vgt_num_indices);
325 R600_OUT_BATCH(vgt_draw_initiator);
326
327 for (i = start; i < (start + num_indices); i++) {
328 R600_OUT_BATCH(i);
329 }
330 END_BATCH();
331 COMMIT_BATCH();
332
333 }
334
335 static GLboolean r700RunRender(GLcontext * ctx,
336 struct tnl_pipeline_stage *stage)
337 {
338 context_t *context = R700_CONTEXT(ctx);
339 radeonContextPtr radeon = &context->radeon;
340 unsigned int i, ind_count = 0, id = 0;
341 TNLcontext *tnl = TNL_CONTEXT(ctx);
342 struct vertex_buffer *vb = &tnl->vb;
343 struct radeon_renderbuffer *rrb;
344
345 for (i = 0; i < vb->PrimitiveCount; i++)
346 ind_count += vb->Primitive[i].count + 10;
347
348 /* just an estimate, need to properly calculate this */
349 rcommonEnsureCmdBufSpace(&context->radeon,
350 radeon->hw.max_state_size + ind_count + 1000, __FUNCTION__);
351
352 r700Start3D(context);
353 r700UpdateShaders(ctx);
354 r700SetScissor(context);
355 r700SetupShaders(ctx);
356 radeonEmitState(radeon);
357
358 /* richard test code */
359 for (i = 0; i < vb->PrimitiveCount; i++) {
360 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
361 GLuint start = vb->Primitive[i].start;
362 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
363 r700RunRenderPrimitive(ctx, start, end, prim);
364 }
365
366 /* Flush render op cached for last several quads. */
367 r700WaitForIdleClean(context);
368
369 rrb = radeon_get_colorbuffer(&context->radeon);
370 if (!rrb || !rrb->bo)
371 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
372 CB_ACTION_ENA_bit | (1 << (id + 6)));
373
374 rrb = radeon_get_depthbuffer(&context->radeon);
375 if (!rrb || !rrb->bo)
376 r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
377 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
378
379 radeonReleaseArrays(ctx, ~0);
380
381 return GL_FALSE;
382 }
383
384 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
385 struct tnl_pipeline_stage *stage) /* -------------------- */
386 {
387 GLboolean bRet = GL_TRUE;
388
389 return bRet;
390 }
391
392 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
393 struct tnl_pipeline_stage *stage)
394 {
395 GLboolean bRet = GL_FALSE;
396
397 /* TODO : sw fallback */
398
399 /**
400 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
401 */
402 if(!r600ValidateBuffers(ctx))
403 {
404 return GL_TRUE;
405 }
406
407 bRet = r700RunRender(ctx, stage);
408
409 return bRet;
410 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
411 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
412 }
413
414 const struct tnl_pipeline_stage _r700_render_stage = {
415 "r700 Hardware Rasterization",
416 NULL,
417 NULL,
418 NULL,
419 NULL,
420 r700RunNonTCLRender
421 };
422
423 const struct tnl_pipeline_stage _r700_tcl_stage = {
424 "r700 Hardware Transform, Clipping and Lighting",
425 NULL,
426 NULL,
427 NULL,
428 NULL,
429 r700RunTCLRender
430 };
431
432 const struct tnl_pipeline_stage *r700_pipeline[] =
433 {
434 &_r700_tcl_stage,
435 &_tnl_vertex_transform_stage,
436 &_tnl_normal_transform_stage,
437 &_tnl_lighting_stage,
438 &_tnl_fog_coordinate_stage,
439 &_tnl_texgen_stage,
440 &_tnl_texture_transform_stage,
441 &_tnl_vertex_program_stage,
442
443 &_r700_render_stage,
444 &_tnl_render_stage,
445 0,
446 };
447
448