r300: cleanup some of the swtcl code
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
90 GLvoid * data, int stride, int count)
91 {
92 int i;
93 int *out = (int *)(rvb->address + rvb->start);
94
95 if (RADEON_DEBUG & DEBUG_VERTS)
96 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
97 __FUNCTION__, count, stride, (void *)out, (void *)data);
98
99 if (stride == 4)
100 COPY_DWORDS(out, data, count);
101 else
102 for (i = 0; i < count; i++) {
103 out[0] = *(int *)data;
104 out++;
105 data += stride;
106 }
107 }
108
109 static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
110 GLvoid * data, int stride, int count)
111 {
112 int i;
113 int *out = (int *)(rvb->address + rvb->start);
114
115 if (RADEON_DEBUG & DEBUG_VERTS)
116 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
117 __FUNCTION__, count, stride, (void *)out, (void *)data);
118
119 if (stride == 8)
120 COPY_DWORDS(out, data, count * 2);
121 else
122 for (i = 0; i < count; i++) {
123 out[0] = *(int *)data;
124 out[1] = *(int *)(data + 4);
125 out += 2;
126 data += stride;
127 }
128 }
129
130 static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
131 GLvoid * data, int stride, int count)
132 {
133 int i;
134 int *out = (int *)(rvb->address + rvb->start);
135
136 if (RADEON_DEBUG & DEBUG_VERTS)
137 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
138 __FUNCTION__, count, stride, (void *)out, (void *)data);
139
140 if (stride == 12)
141 COPY_DWORDS(out, data, count * 3);
142 else
143 for (i = 0; i < count; i++) {
144 out[0] = *(int *)data;
145 out[1] = *(int *)(data + 4);
146 out[2] = *(int *)(data + 8);
147 out += 3;
148 data += stride;
149 }
150 }
151
152 static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
153 GLvoid * data, int stride, int count)
154 {
155 int i;
156 int *out = (int *)(rvb->address + rvb->start);
157
158 if (RADEON_DEBUG & DEBUG_VERTS)
159 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
160 __FUNCTION__, count, stride, (void *)out, (void *)data);
161
162 if (stride == 16)
163 COPY_DWORDS(out, data, count * 4);
164 else
165 for (i = 0; i < count; i++) {
166 out[0] = *(int *)data;
167 out[1] = *(int *)(data + 4);
168 out[2] = *(int *)(data + 8);
169 out[3] = *(int *)(data + 12);
170 out += 4;
171 data += stride;
172 }
173 }
174
175 static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
176 GLvoid * data, int size, int stride, int count)
177 {
178 r300ContextPtr rmesa = R300_CONTEXT(ctx);
179
180 if (stride == 0) {
181 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
182 count = 1;
183 rvb->aos_offset = GET_START(rvb);
184 rvb->aos_stride = 0;
185 } else {
186 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
187 rvb->aos_offset = GET_START(rvb);
188 rvb->aos_stride = size;
189 }
190
191 switch (size) {
192 case 1:
193 r300EmitVec4(ctx, rvb, data, stride, count);
194 break;
195 case 2:
196 r300EmitVec8(ctx, rvb, data, stride, count);
197 break;
198 case 3:
199 r300EmitVec12(ctx, rvb, data, stride, count);
200 break;
201 case 4:
202 r300EmitVec16(ctx, rvb, data, stride, count);
203 break;
204 default:
205 assert(0);
206 break;
207 }
208 }
209
210 static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
211 int *inputs, GLint * tab, GLuint nr)
212 {
213 GLuint i, dw;
214
215 /* type, inputs, stop bit, size */
216 for (i = 0; i + 1 < nr; i += 2) {
217 dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1);
218 dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16;
219 if (i + 2 == nr) {
220 dw |= (R300_VAP_INPUT_ROUTE_END << 16);
221 }
222 dst[i >> 1] = dw;
223 }
224
225 if (nr & 1) {
226 dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[nr - 1]] << 8) | (attribptr[tab[nr - 1]]->size - 1);
227 dw |= R300_VAP_INPUT_ROUTE_END;
228 dst[nr >> 1] = dw;
229 }
230
231 return (nr + 1) >> 1;
232 }
233
234 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
235 {
236 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
237 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
238 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
239 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
240 }
241
242 GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
243 {
244 GLuint i;
245
246 for (i = 0; i + 1 < nr; i += 2) {
247 dst[i >> 1] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
248 dst[i >> 1] |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
249 }
250
251 if (nr & 1) {
252 dst[nr >> 1] = r300VAPInputRoute1Swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
253 }
254
255 return (nr + 1) >> 1;
256 }
257
258 GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
259 {
260 /* No idea what this value means. I have seen other values written to
261 * this register... */
262 return 0x5555;
263 }
264
265 GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
266 {
267 r300ContextPtr rmesa = R300_CONTEXT(ctx);
268 GLuint i, vic_1 = 0;
269
270 if (InputsRead & (1 << VERT_ATTRIB_POS))
271 vic_1 |= R300_INPUT_CNTL_POS;
272
273 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
274 vic_1 |= R300_INPUT_CNTL_NORMAL;
275
276 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
277 vic_1 |= R300_INPUT_CNTL_COLOR;
278
279 rmesa->state.texture.tc_count = 0;
280 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
281 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
282 rmesa->state.texture.tc_count++;
283 vic_1 |= R300_INPUT_CNTL_TC0 << i;
284 }
285
286 return vic_1;
287 }
288
289 GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
290 {
291 GLuint ret = 0;
292
293 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
294 ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
295
296 if (OutputsWritten & (1 << VERT_RESULT_COL0))
297 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
298
299 if (OutputsWritten & (1 << VERT_RESULT_COL1))
300 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
301
302 #if 0
303 if (OutputsWritten & (1 << VERT_RESULT_BFC0))
304 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
305
306 if (OutputsWritten & (1 << VERT_RESULT_BFC1))
307 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
308
309 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
310 #endif
311
312 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
313 ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
314
315 return ret;
316 }
317
318 GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
319 {
320 GLuint i, ret = 0;
321
322 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
323 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
324 ret |= (4 << (3 * i));
325 }
326 }
327
328 return ret;
329 }
330
331 /* Emit vertex data to GART memory
332 * Route inputs to the vertex processor
333 * This function should never return R300_FALLBACK_TCL when using software tcl.
334 */
335 int r300EmitArrays(GLcontext * ctx)
336 {
337 r300ContextPtr rmesa = R300_CONTEXT(ctx);
338 TNLcontext *tnl = TNL_CONTEXT(ctx);
339 struct vertex_buffer *vb = &tnl->vb;
340 GLuint nr;
341 GLuint count = vb->Count;
342 GLuint i;
343 GLuint InputsRead = 0, OutputsWritten = 0;
344 int *inputs = NULL;
345 int vir_inputs[VERT_ATTRIB_MAX];
346 GLint tab[VERT_ATTRIB_MAX];
347 int swizzle[VERT_ATTRIB_MAX][4];
348 struct r300_vertex_program *prog =
349 (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
350
351 if (hw_tcl_on) {
352 inputs = prog->inputs;
353 InputsRead = prog->key.InputsRead;
354 OutputsWritten = prog->key.OutputsWritten;
355 } else {
356 inputs = rmesa->state.sw_tcl_inputs;
357
358 DECLARE_RENDERINPUTS(render_inputs_bitset);
359 RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
360
361 vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
362
363 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
364 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
365 //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
366
367 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
368 InputsRead |= 1 << VERT_ATTRIB_POS;
369 OutputsWritten |= 1 << VERT_RESULT_HPOS;
370 }
371
372 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
373 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
374 OutputsWritten |= 1 << VERT_RESULT_COL0;
375 }
376
377 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
378 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
379 OutputsWritten |= 1 << VERT_RESULT_COL1;
380 }
381
382 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
383 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
384 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
385 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
386 }
387 }
388
389 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
390 if (InputsRead & (1 << i)) {
391 inputs[i] = nr++;
392 } else {
393 inputs[i] = -1;
394 }
395 }
396
397 /* Fixed, apply to vir0 only */
398 memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
399 inputs = vir_inputs;
400 if (InputsRead & VERT_ATTRIB_POS)
401 inputs[VERT_ATTRIB_POS] = 0;
402 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
403 inputs[VERT_ATTRIB_COLOR0] = 2;
404 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
405 inputs[VERT_ATTRIB_COLOR1] = 3;
406 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
407 if (InputsRead & (1 << i))
408 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
409
410 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
411 }
412
413 assert(InputsRead);
414 assert(OutputsWritten);
415
416 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
417 if (InputsRead & (1 << i)) {
418 tab[nr++] = i;
419 }
420 }
421
422 if (nr > R300_MAX_AOS_ARRAYS) {
423 return R300_FALLBACK_TCL;
424 }
425
426 for (i = 0; i < nr; i++) {
427 int ci, fix, found = 0;
428
429 swizzle[i][0] = SWIZZLE_ZERO;
430 swizzle[i][1] = SWIZZLE_ZERO;
431 swizzle[i][2] = SWIZZLE_ZERO;
432 swizzle[i][3] = SWIZZLE_ONE;
433
434 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
435 swizzle[i][ci] = ci;
436 }
437
438 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
439 if (vb->AttribPtr[tab[i]]->stride % 4) {
440 return R300_FALLBACK_TCL;
441 }
442 rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
443 rmesa->state.aos[i].start = 0;
444 rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
445 rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
446 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
447 } else {
448 r300EmitVec(ctx, &rmesa->state.aos[i],
449 vb->AttribPtr[tab[i]]->data,
450 vb->AttribPtr[tab[i]]->size,
451 vb->AttribPtr[tab[i]]->stride, count);
452 }
453
454 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
455
456 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
457 if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
458 continue;
459 }
460 found = 1;
461 break;
462 }
463
464 if (found) {
465 if (fix > 0) {
466 WARN_ONCE("Feeling lucky?\n");
467 }
468 rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
469 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
470 swizzle[i][ci] += fix;
471 }
472 } else {
473 WARN_ONCE
474 ("Cannot handle offset %x with stride %d, comp %d\n",
475 rmesa->state.aos[i].aos_offset,
476 rmesa->state.aos[i].aos_stride,
477 vb->AttribPtr[tab[i]]->size);
478 return R300_FALLBACK_TCL;
479 }
480 }
481
482 /* Setup INPUT_ROUTE. */
483 R300_STATECHANGE(rmesa, vir[0]);
484 ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
485 r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
486 vb->AttribPtr, inputs, tab, nr);
487 R300_STATECHANGE(rmesa, vir[1]);
488 ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
489 r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
490 nr);
491
492 /* Setup INPUT_CNTL. */
493 R300_STATECHANGE(rmesa, vic);
494 rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
495 rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
496
497 /* Setup OUTPUT_VTX_FMT. */
498 R300_STATECHANGE(rmesa, vof);
499 rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
500 r300VAPOutputCntl0(ctx, OutputsWritten);
501 rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
502 r300VAPOutputCntl1(ctx, OutputsWritten);
503
504 rmesa->state.aos_count = nr;
505
506 return R300_FALLBACK_NONE;
507 }
508
509 #ifdef USER_BUFFERS
510 void r300UseArrays(GLcontext * ctx)
511 {
512 r300ContextPtr rmesa = R300_CONTEXT(ctx);
513 int i;
514
515 if (rmesa->state.elt_dma.buf)
516 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
517
518 for (i = 0; i < rmesa->state.aos_count; i++) {
519 if (rmesa->state.aos[i].buf)
520 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
521 }
522 }
523 #endif
524
525 void r300ReleaseArrays(GLcontext * ctx)
526 {
527 r300ContextPtr rmesa = R300_CONTEXT(ctx);
528 int i;
529
530 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
531 for (i = 0; i < rmesa->state.aos_count; i++) {
532 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
533 }
534 }
535
536 void r300EmitCacheFlush(r300ContextPtr rmesa)
537 {
538 int cmd_reserved = 0;
539 int cmd_written = 0;
540
541 drm_radeon_cmd_header_t *cmd = NULL;
542
543 reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
544 e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
545
546 reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
547 e32(R300_RB3D_ZCACHE_UNKNOWN_03);
548
549
550 }