radeon: make DRI1 one work with new CS mechanism
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "main/glheader.h"
37 #include "main/mtypes.h"
38 #include "main/colormac.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54
55 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
56 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
57 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
58 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
59 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
60 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
61 #error Cannot change these!
62 #endif
63
64 #define DEBUG_ALL DEBUG_VERTS
65
66 #if defined(USE_X86_ASM)
67 #define COPY_DWORDS( dst, src, nr ) \
68 do { \
69 int __tmp; \
70 __asm__ __volatile__( "rep ; movsl" \
71 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
72 : "0" (nr), \
73 "D" ((long)dst), \
74 "S" ((long)src) ); \
75 } while (0)
76 #else
77 #define COPY_DWORDS( dst, src, nr ) \
78 do { \
79 int j; \
80 for ( j = 0 ; j < nr ; j++ ) \
81 dst[j] = ((int *)src)[j]; \
82 dst += nr; \
83 } while (0)
84 #endif
85
86 static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
87 {
88 int i;
89
90 if (RADEON_DEBUG & DEBUG_VERTS)
91 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
92 __FUNCTION__, count, stride, (void *)out, (void *)data);
93
94 if (stride == 4)
95 COPY_DWORDS(out, data, count);
96 else
97 for (i = 0; i < count; i++) {
98 out[0] = *(int *)data;
99 out++;
100 data += stride;
101 }
102 }
103
104 static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
105 {
106 int i;
107
108 if (RADEON_DEBUG & DEBUG_VERTS)
109 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
110 __FUNCTION__, count, stride, (void *)out, (void *)data);
111
112 if (stride == 8)
113 COPY_DWORDS(out, data, count * 2);
114 else
115 for (i = 0; i < count; i++) {
116 out[0] = *(int *)data;
117 out[1] = *(int *)(data + 4);
118 out += 2;
119 data += stride;
120 }
121 }
122
123 static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
124 {
125 int i;
126
127 if (RADEON_DEBUG & DEBUG_VERTS)
128 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
129 __FUNCTION__, count, stride, (void *)out, (void *)data);
130
131 if (stride == 12) {
132 COPY_DWORDS(out, data, count * 3);
133 }
134 else
135 for (i = 0; i < count; i++) {
136 out[0] = *(int *)data;
137 out[1] = *(int *)(data + 4);
138 out[2] = *(int *)(data + 8);
139 out += 3;
140 data += stride;
141 }
142 }
143
144 static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
145 {
146 int i;
147
148 if (RADEON_DEBUG & DEBUG_VERTS)
149 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
150 __FUNCTION__, count, stride, (void *)out, (void *)data);
151
152 if (stride == 16)
153 COPY_DWORDS(out, data, count * 4);
154 else
155 for (i = 0; i < count; i++) {
156 out[0] = *(int *)data;
157 out[1] = *(int *)(data + 4);
158 out[2] = *(int *)(data + 8);
159 out[3] = *(int *)(data + 12);
160 out += 4;
161 data += stride;
162 }
163 }
164
165 static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos,
166 GLvoid * data, int size, int stride, int count)
167 {
168 r300ContextPtr rmesa = R300_CONTEXT(ctx);
169 uint32_t *out;
170 uint32_t bo_size;
171
172 memset(aos, 0, sizeof(struct r300_aos));
173 if (stride == 0) {
174 bo_size = size * 4;
175 count = 1;
176 aos->stride = 0;
177 } else {
178 bo_size = size * count * 4;
179 aos->stride = size;
180 }
181 aos->bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
182 0, bo_size, 32, RADEON_GEM_DOMAIN_GTT, 0);
183 aos->offset = 0;
184 aos->components = size;
185 aos->count = count;
186
187 radeon_bo_map(aos->bo, 1);
188 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
189 switch (size) {
190 case 1: r300EmitVec4(out, data, stride, count); break;
191 case 2: r300EmitVec8(out, data, stride, count); break;
192 case 3: r300EmitVec12(out, data, stride, count); break;
193 case 4: r300EmitVec16(out, data, stride, count); break;
194 default:
195 assert(0);
196 break;
197 }
198 radeon_bo_unmap(aos->bo);
199 }
200
201 #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
202 (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
203
204 GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
205 int *inputs, GLint * tab, GLuint nr)
206 {
207 GLuint i, dw;
208
209 /* type, inputs, stop bit, size */
210 for (i = 0; i < nr; i += 2) {
211 /* make sure input is valid, would lockup the gpu */
212 assert(inputs[tab[i]] != -1);
213 dw = (R300_SIGNED | DW_SIZE(i));
214 if (i + 1 == nr) {
215 dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
216 } else {
217 assert(inputs[tab[i + 1]] != -1);
218 dw |= (R300_SIGNED |
219 DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
220 if (i + 2 == nr) {
221 dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
222 }
223 }
224 dst[i >> 1] = dw;
225 }
226
227 return (nr + 1) >> 1;
228 }
229
230 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
231 {
232 return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
233 (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
234 (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
235 (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
236 }
237
238 GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
239 {
240 GLuint i, dw;
241
242 for (i = 0; i < nr; i += 2) {
243 dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
244 ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
245 R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
246 if (i + 1 < nr) {
247 dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
248 ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
249 R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
250 }
251 dst[i >> 1] = dw;
252 }
253
254 return (nr + 1) >> 1;
255 }
256
257 GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
258 {
259 /* No idea what this value means. I have seen other values written to
260 * this register... */
261 return 0x5555;
262 }
263
264 GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
265 {
266 r300ContextPtr rmesa = R300_CONTEXT(ctx);
267 GLuint i, vic_1 = 0;
268
269 if (InputsRead & (1 << VERT_ATTRIB_POS))
270 vic_1 |= R300_INPUT_CNTL_POS;
271
272 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
273 vic_1 |= R300_INPUT_CNTL_NORMAL;
274
275 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
276 vic_1 |= R300_INPUT_CNTL_COLOR;
277
278 rmesa->state.texture.tc_count = 0;
279 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
280 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
281 rmesa->state.texture.tc_count++;
282 vic_1 |= R300_INPUT_CNTL_TC0 << i;
283 }
284
285 return vic_1;
286 }
287
288 GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
289 {
290 GLuint ret = 0;
291
292 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
293 ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
294
295 if (OutputsWritten & (1 << VERT_RESULT_COL0))
296 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
297
298 if (OutputsWritten & (1 << VERT_RESULT_COL1))
299 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
300
301 if (OutputsWritten & (1 << VERT_RESULT_BFC0)
302 || OutputsWritten & (1 << VERT_RESULT_BFC1))
303 ret |=
304 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
305 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
306 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
307
308 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
309 ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
310
311 return ret;
312 }
313
314 GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
315 {
316 GLuint i, ret = 0;
317
318 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
319 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
320 ret |= (4 << (3 * i));
321 }
322 }
323
324 return ret;
325 }
326
327 /* Emit vertex data to GART memory
328 * Route inputs to the vertex processor
329 * This function should never return R300_FALLBACK_TCL when using software tcl.
330 */
331 int r300EmitArrays(GLcontext * ctx)
332 {
333 r300ContextPtr rmesa = R300_CONTEXT(ctx);
334 TNLcontext *tnl = TNL_CONTEXT(ctx);
335 struct vertex_buffer *vb = &tnl->vb;
336 GLuint nr;
337 GLuint count = vb->Count;
338 GLuint i;
339 GLuint InputsRead = 0, OutputsWritten = 0;
340 int *inputs = NULL;
341 int vir_inputs[VERT_ATTRIB_MAX];
342 GLint tab[VERT_ATTRIB_MAX];
343 int swizzle[VERT_ATTRIB_MAX][4];
344 struct r300_vertex_program *prog =
345 (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
346
347 if (hw_tcl_on) {
348 inputs = prog->inputs;
349 InputsRead = prog->key.InputsRead;
350 OutputsWritten = prog->key.OutputsWritten;
351 } else {
352 inputs = rmesa->state.sw_tcl_inputs;
353
354 DECLARE_RENDERINPUTS(render_inputs_bitset);
355 RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
356
357 vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
358
359 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
360 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
361
362 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
363 InputsRead |= 1 << VERT_ATTRIB_POS;
364 OutputsWritten |= 1 << VERT_RESULT_HPOS;
365 }
366
367 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
368 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
369 OutputsWritten |= 1 << VERT_RESULT_COL0;
370 }
371
372 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
373 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
374 OutputsWritten |= 1 << VERT_RESULT_COL1;
375 }
376
377 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
378 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
379 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
380 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
381 }
382 }
383
384 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
385 if (InputsRead & (1 << i)) {
386 inputs[i] = nr++;
387 } else {
388 inputs[i] = -1;
389 }
390 }
391
392 /* Fixed, apply to vir0 only */
393 memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
394 inputs = vir_inputs;
395 if (InputsRead & VERT_ATTRIB_POS)
396 inputs[VERT_ATTRIB_POS] = 0;
397 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
398 inputs[VERT_ATTRIB_COLOR0] = 2;
399 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
400 inputs[VERT_ATTRIB_COLOR1] = 3;
401 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
402 if (InputsRead & (1 << i))
403 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
404
405 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
406 }
407
408 assert(InputsRead);
409 assert(OutputsWritten);
410
411 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
412 if (InputsRead & (1 << i)) {
413 tab[nr++] = i;
414 }
415 }
416
417 if (nr > R300_MAX_AOS_ARRAYS) {
418 return R300_FALLBACK_TCL;
419 }
420
421 for (i = 0; i < nr; i++) {
422 int ci;
423
424 swizzle[i][0] = SWIZZLE_ZERO;
425 swizzle[i][1] = SWIZZLE_ZERO;
426 swizzle[i][2] = SWIZZLE_ZERO;
427 swizzle[i][3] = SWIZZLE_ONE;
428
429 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
430 swizzle[i][ci] = ci;
431 }
432 r300EmitVec(ctx, &rmesa->state.aos[i],
433 vb->AttribPtr[tab[i]]->data,
434 vb->AttribPtr[tab[i]]->size,
435 vb->AttribPtr[tab[i]]->stride, count);
436 }
437
438 /* Setup INPUT_ROUTE. */
439 if (rmesa->radeon.radeonScreen->kernel_mm) {
440 R300_STATECHANGE(rmesa, vir[0]);
441 rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
442 rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
443 rmesa->hw.vir[0].cmd[0] |=
444 (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
445 vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
446 R300_STATECHANGE(rmesa, vir[1]);
447 rmesa->hw.vir[1].cmd[0] |=
448 (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
449 nr) & 0x3FFF) << 16;
450 } else {
451 R300_STATECHANGE(rmesa, vir[0]);
452 ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
453 r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
454 vb->AttribPtr, inputs, tab, nr);
455 R300_STATECHANGE(rmesa, vir[1]);
456 ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
457 r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
458 nr);
459 }
460
461 /* Setup INPUT_CNTL. */
462 R300_STATECHANGE(rmesa, vic);
463 rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
464 rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
465
466 /* Setup OUTPUT_VTX_FMT. */
467 R300_STATECHANGE(rmesa, vof);
468 rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
469 r300VAPOutputCntl0(ctx, OutputsWritten);
470 rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
471 r300VAPOutputCntl1(ctx, OutputsWritten);
472
473 rmesa->state.aos_count = nr;
474
475 return R300_FALLBACK_NONE;
476 }
477
478 void r300ReleaseArrays(GLcontext * ctx)
479 {
480 r300ContextPtr rmesa = R300_CONTEXT(ctx);
481 int i;
482
483 if (rmesa->state.elt_dma_bo) {
484 radeon_bo_unref(rmesa->state.elt_dma_bo);
485 rmesa->state.elt_dma_bo = 0;
486 }
487 for (i = 0; i < rmesa->state.aos_count; i++) {
488 if (rmesa->state.aos[i].bo) {
489 rmesa->state.aos[i].bo = radeon_bo_unref(rmesa->state.aos[i].bo);
490 }
491 }
492 }
493
494 void r300EmitCacheFlush(r300ContextPtr rmesa)
495 {
496 BATCH_LOCALS(rmesa);
497
498 BEGIN_BATCH(4);
499 OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
500 R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
501 R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
502 OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
503 R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
504 R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
505 END_BATCH();
506 COMMIT_BATCH();
507 }