r300g: Use radeon compiler for fragment programs
[mesa.git] / src / mesa / drivers / dri / r600 / r700_render.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
26 */
27
28 #include "main/glheader.h"
29 #include "main/state.h"
30 #include "main/imports.h"
31 #include "main/enums.h"
32 #include "main/macros.h"
33 #include "main/context.h"
34 #include "main/dd.h"
35 #include "main/simple_list.h"
36 #include "main/api_arrayelt.h"
37 #include "swrast/swrast.h"
38 #include "swrast_setup/swrast_setup.h"
39 #include "vbo/vbo.h"
40
41 #include "tnl/tnl.h"
42 #include "tnl/t_vp_build.h"
43 #include "tnl/t_context.h"
44 #include "tnl/t_vertex.h"
45 #include "tnl/t_pipeline.h"
46
47 #include "radeon_mipmap_tree.h"
48 #include "r600_context.h"
49 #include "r600_cmdbuf.h"
50
51 #include "r600_tex.h"
52
53 #include "r700_vertprog.h"
54 #include "r700_fragprog.h"
55 #include "r700_state.h"
56
57 void r700WaitForIdle(context_t *context);
58 void r700WaitForIdleClean(context_t *context);
59 void r700Start3D(context_t *context);
60 GLboolean r700SendTextureState(context_t *context);
61 unsigned int r700PrimitiveType(int prim);
62 void r600UpdateTextureState(GLcontext * ctx);
63 GLboolean r700SyncSurf(context_t *context,
64 struct radeon_bo *pbo,
65 uint32_t read_domain,
66 uint32_t write_domain,
67 uint32_t sync_type);
68
69 void r700WaitForIdle(context_t *context)
70 {
71 BATCH_LOCALS(&context->radeon);
72 BEGIN_BATCH_NO_AUTOSTATE(3);
73
74 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
75 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
76 R600_OUT_BATCH(WAIT_3D_IDLE_bit);
77
78 END_BATCH();
79 COMMIT_BATCH();
80 }
81
82 void r700WaitForIdleClean(context_t *context)
83 {
84 BATCH_LOCALS(&context->radeon);
85 BEGIN_BATCH_NO_AUTOSTATE(5);
86
87 R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
88 R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
89
90 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
91 R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
92 R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
93
94 END_BATCH();
95 COMMIT_BATCH();
96 }
97
98 void r700Start3D(context_t *context)
99 {
100 BATCH_LOCALS(&context->radeon);
101 if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
102 {
103 BEGIN_BATCH_NO_AUTOSTATE(2);
104 R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
105 R600_OUT_BATCH(0);
106 END_BATCH();
107 }
108
109 BEGIN_BATCH_NO_AUTOSTATE(3);
110 R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
111 R600_OUT_BATCH(0x80000000);
112 R600_OUT_BATCH(0x80000000);
113 END_BATCH();
114
115 COMMIT_BATCH();
116
117 r700WaitForIdleClean(context);
118 }
119
120 static GLboolean r700SetupShaders(GLcontext * ctx)
121 {
122 context_t *context = R700_CONTEXT(ctx);
123
124 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
125
126 GLuint exportCount;
127
128 r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
129 r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
130
131 SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
132 SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
133
134 r700SetupVertexProgram(ctx);
135
136 r700SetupFragmentProgram(ctx);
137
138 exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
139 r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
140
141 return GL_TRUE;
142 }
143
144 GLboolean r700SendTextureState(context_t *context)
145 {
146 unsigned int i;
147 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
148 offset_modifiers offset_mod = {NO_SHIFT, 0, 0xFFFFFFFF};
149 struct radeon_bo *bo = NULL;
150 BATCH_LOCALS(&context->radeon);
151
152 for (i=0; i<R700_TEXTURE_NUMBERUNITS; i++) {
153 radeonTexObj *t = r700->textures[i];
154 if (t) {
155 if (!t->image_override)
156 bo = t->mt->bo;
157 else
158 bo = t->bo;
159 if (bo) {
160
161 r700SyncSurf(context, bo,
162 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
163 0, TC_ACTION_ENA_bit);
164
165 BEGIN_BATCH_NO_AUTOSTATE(9);
166 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
167 R600_OUT_BATCH(i * 7);
168 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
169 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
170 R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
171 bo,
172 0,
173 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
174 R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
175 bo,
176 r700->textures[i]->SQ_TEX_RESOURCE3,
177 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0, &offset_mod);
178 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
179 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
180 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
181 END_BATCH();
182
183 BEGIN_BATCH_NO_AUTOSTATE(5);
184 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
185 R600_OUT_BATCH(i * 3);
186 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
187 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
188 R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
189 END_BATCH();
190
191 BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
192 R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
193 R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
194 R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
195 R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
196 R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
197 END_BATCH();
198
199 COMMIT_BATCH();
200 }
201 }
202 }
203 return GL_TRUE;
204 }
205
206 GLboolean r700SyncSurf(context_t *context,
207 struct radeon_bo *pbo,
208 uint32_t read_domain,
209 uint32_t write_domain,
210 uint32_t sync_type)
211 {
212 BATCH_LOCALS(&context->radeon);
213 uint32_t cp_coher_size;
214 offset_modifiers offset_mod;
215
216 if (pbo->size == 0xffffffff)
217 cp_coher_size = 0xffffffff;
218 else
219 cp_coher_size = ((pbo->size + 255) >> 8);
220
221 offset_mod.shift = NO_SHIFT;
222 offset_mod.shiftbits = 0;
223 offset_mod.mask = 0xFFFFFFFF;
224
225 BEGIN_BATCH_NO_AUTOSTATE(5);
226 R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
227 R600_OUT_BATCH(sync_type);
228 R600_OUT_BATCH(cp_coher_size);
229 R600_OUT_BATCH_RELOC(0,
230 pbo,
231 0,
232 read_domain, write_domain, 0, &offset_mod); // ???
233 R600_OUT_BATCH(10);
234
235 END_BATCH();
236 COMMIT_BATCH();
237
238 return GL_TRUE;
239 }
240
241 unsigned int r700PrimitiveType(int prim)
242 {
243 switch (prim & PRIM_MODE_MASK)
244 {
245 case GL_POINTS:
246 return DI_PT_POINTLIST;
247 break;
248 case GL_LINES:
249 return DI_PT_LINELIST;
250 break;
251 case GL_LINE_STRIP:
252 return DI_PT_LINESTRIP;
253 break;
254 case GL_LINE_LOOP:
255 return DI_PT_LINELOOP;
256 break;
257 case GL_TRIANGLES:
258 return DI_PT_TRILIST;
259 break;
260 case GL_TRIANGLE_STRIP:
261 return DI_PT_TRISTRIP;
262 break;
263 case GL_TRIANGLE_FAN:
264 return DI_PT_TRIFAN;
265 break;
266 case GL_QUADS:
267 return DI_PT_QUADLIST;
268 break;
269 case GL_QUAD_STRIP:
270 return DI_PT_QUADSTRIP;
271 break;
272 case GL_POLYGON:
273 return DI_PT_POLYGON;
274 break;
275 default:
276 assert(0);
277 return -1;
278 break;
279 }
280 }
281
282 static GLboolean r700RunRender(GLcontext * ctx,
283 struct tnl_pipeline_stage *stage)
284 {
285 context_t *context = R700_CONTEXT(ctx);
286 R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
287 int lastIndex = 0;
288 BATCH_LOCALS(&context->radeon);
289
290 unsigned int i, j;
291 TNLcontext *tnl = TNL_CONTEXT(ctx);
292 struct vertex_buffer *vb = &tnl->vb;
293
294 r700Start3D(context); /* TODO : this is too much. */
295
296 r700SendSQConfig(context);
297
298 r700UpdateShaders(ctx);
299
300 r700SetScissor(context);
301 r700SetRenderTarget(context, 0);
302 r700SetDepthTarget(context);
303
304 if(r700SetupStreams(ctx))
305 {
306 return GL_TRUE;
307 }
308
309 r600UpdateTextureState(ctx);
310 r700SendTextureState(context);
311
312 r700SetupShaders(ctx);
313
314 r700SendFSState(context); // FIXME just a place holder for now
315 r700SendPSState(context);
316 r700SendVSState(context);
317
318 r700SendUCPState(context);
319 r700SendContextStates(context);
320 r700SendViewportState(context, 0);
321 r700SendRenderTargetState(context, 0);
322 r700SendDepthTargetState(context);
323
324 /* richard test code */
325 for (i = 0; i < vb->PrimitiveCount; i++)
326 {
327 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
328 GLuint start = vb->Primitive[i].start;
329 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
330 GLuint numIndices = vb->Primitive[i].count;
331 GLuint numEntires;
332
333 unsigned int VGT_DRAW_INITIATOR = 0;
334 unsigned int VGT_INDEX_TYPE = 0;
335 unsigned int VGT_PRIMITIVE_TYPE = 0;
336 unsigned int VGT_NUM_INDICES = 0;
337
338 if (numIndices < 1)
339 continue;
340
341 numEntires = 3 /* VGT_PRIMITIVE_TYPE */
342 + 2 /* VGT_INDEX_TYPE */
343 + 2 /* NUM_INSTANCES */
344 + numIndices + 3; /* DRAW_INDEX_IMMD */
345
346 BEGIN_BATCH_NO_AUTOSTATE(numEntires);
347
348 // prim
349 VGT_PRIMITIVE_TYPE |= r700PrimitiveType(prim) << VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift;
350 R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
351 R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
352 R600_OUT_BATCH(VGT_PRIMITIVE_TYPE);
353
354 // index type
355 VGT_INDEX_TYPE |= DI_INDEX_SIZE_32_BIT << INDEX_TYPE_shift;
356 R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
357 R600_OUT_BATCH(VGT_INDEX_TYPE);
358
359 // num instances
360 R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
361 R600_OUT_BATCH(1);
362
363 // draw packet
364 VGT_NUM_INDICES = numIndices;
365 VGT_DRAW_INITIATOR |= DI_SRC_SEL_IMMEDIATE << SOURCE_SELECT_shift;
366 VGT_DRAW_INITIATOR |= DI_MAJOR_MODE_0 << MAJOR_MODE_shift;
367
368 R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (numIndices + 1)));
369 R600_OUT_BATCH(VGT_NUM_INDICES);
370 R600_OUT_BATCH(VGT_DRAW_INITIATOR);
371
372 for (j = lastIndex; j < lastIndex + numIndices; j++)
373 {
374 R600_OUT_BATCH(j);
375 }
376 lastIndex += numIndices;
377
378 END_BATCH();
379 COMMIT_BATCH();
380 }
381
382 /* Flush render op cached for last several quads. */
383 r700WaitForIdleClean(context);
384
385 radeonReleaseArrays(ctx, 0);
386
387 rcommonFlushCmdBuf( &context->radeon, __FUNCTION__ );
388
389 return GL_FALSE;
390 }
391
392 static GLboolean r700RunNonTCLRender(GLcontext * ctx,
393 struct tnl_pipeline_stage *stage) /* -------------------- */
394 {
395 GLboolean bRet = GL_TRUE;
396
397 return bRet;
398 }
399
400 static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/
401 struct tnl_pipeline_stage *stage)
402 {
403 GLboolean bRet = GL_FALSE;
404
405 /* TODO : sw fallback */
406
407 /**
408 * Ensure all enabled and complete textures are uploaded along with any buffers being used.
409 */
410 if(!r600ValidateBuffers(ctx))
411 {
412 return GL_TRUE;
413 }
414
415 bRet = r700RunRender(ctx, stage);
416
417 return bRet;
418 //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline
419 //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success.
420 }
421
422 const struct tnl_pipeline_stage _r700_render_stage = {
423 "r700 Hardware Rasterization",
424 NULL,
425 NULL,
426 NULL,
427 NULL,
428 r700RunNonTCLRender
429 };
430
431 const struct tnl_pipeline_stage _r700_tcl_stage = {
432 "r700 Hardware Transform, Clipping and Lighting",
433 NULL,
434 NULL,
435 NULL,
436 NULL,
437 r700RunTCLRender
438 };
439
440 const struct tnl_pipeline_stage *r700_pipeline[] =
441 {
442 &_r700_tcl_stage,
443 &_tnl_vertex_transform_stage,
444 &_tnl_normal_transform_stage,
445 &_tnl_lighting_stage,
446 &_tnl_fog_coordinate_stage,
447 &_tnl_texgen_stage,
448 &_tnl_texture_transform_stage,
449 &_tnl_vertex_program_stage,
450
451 &_r700_render_stage,
452 &_tnl_render_stage,
453 0,
454 };
455
456