r300: Simplify r300VAPInputRoute1.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
90 GLvoid * data, int stride, int count)
91 {
92 int i;
93 int *out = (int *)(rvb->address + rvb->start);
94
95 if (RADEON_DEBUG & DEBUG_VERTS)
96 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
97 __FUNCTION__, count, stride, (void *)out, (void *)data);
98
99 if (stride == 4)
100 COPY_DWORDS(out, data, count);
101 else
102 for (i = 0; i < count; i++) {
103 out[0] = *(int *)data;
104 out++;
105 data += stride;
106 }
107 }
108
109 static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
110 GLvoid * data, int stride, int count)
111 {
112 int i;
113 int *out = (int *)(rvb->address + rvb->start);
114
115 if (RADEON_DEBUG & DEBUG_VERTS)
116 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
117 __FUNCTION__, count, stride, (void *)out, (void *)data);
118
119 if (stride == 8)
120 COPY_DWORDS(out, data, count * 2);
121 else
122 for (i = 0; i < count; i++) {
123 out[0] = *(int *)data;
124 out[1] = *(int *)(data + 4);
125 out += 2;
126 data += stride;
127 }
128 }
129
130 static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
131 GLvoid * data, int stride, int count)
132 {
133 int i;
134 int *out = (int *)(rvb->address + rvb->start);
135
136 if (RADEON_DEBUG & DEBUG_VERTS)
137 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
138 __FUNCTION__, count, stride, (void *)out, (void *)data);
139
140 if (stride == 12)
141 COPY_DWORDS(out, data, count * 3);
142 else
143 for (i = 0; i < count; i++) {
144 out[0] = *(int *)data;
145 out[1] = *(int *)(data + 4);
146 out[2] = *(int *)(data + 8);
147 out += 3;
148 data += stride;
149 }
150 }
151
152 static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
153 GLvoid * data, int stride, int count)
154 {
155 int i;
156 int *out = (int *)(rvb->address + rvb->start);
157
158 if (RADEON_DEBUG & DEBUG_VERTS)
159 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
160 __FUNCTION__, count, stride, (void *)out, (void *)data);
161
162 if (stride == 16)
163 COPY_DWORDS(out, data, count * 4);
164 else
165 for (i = 0; i < count; i++) {
166 out[0] = *(int *)data;
167 out[1] = *(int *)(data + 4);
168 out[2] = *(int *)(data + 8);
169 out[3] = *(int *)(data + 12);
170 out += 4;
171 data += stride;
172 }
173 }
174
175 static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
176 GLvoid * data, int size, int stride, int count)
177 {
178 r300ContextPtr rmesa = R300_CONTEXT(ctx);
179
180 if (stride == 0) {
181 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
182 count = 1;
183 rvb->aos_offset = GET_START(rvb);
184 rvb->aos_stride = 0;
185 } else {
186 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
187 rvb->aos_offset = GET_START(rvb);
188 rvb->aos_stride = size;
189 }
190
191 switch (size) {
192 case 1:
193 r300EmitVec4(ctx, rvb, data, stride, count);
194 break;
195 case 2:
196 r300EmitVec8(ctx, rvb, data, stride, count);
197 break;
198 case 3:
199 r300EmitVec12(ctx, rvb, data, stride, count);
200 break;
201 case 4:
202 r300EmitVec16(ctx, rvb, data, stride, count);
203 break;
204 default:
205 assert(0);
206 break;
207 }
208 }
209
210 static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
211 int *inputs, GLint * tab, GLuint nr)
212 {
213 GLushort i, w;
214 uint16_t * dst16 = (uint16_t *) dst;
215
216 /* type, inputs, stop bit, size */
217 for (i = 0; i < nr; i++) {
218 /* make sure input is valid, would lockup the gpu */
219 assert(inputs[tab[i]] != -1);
220 w = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1);
221 if (i + 1 == nr) {
222 w |= R300_VAP_INPUT_ROUTE_END;
223 }
224 dst16[i] = w;
225 }
226
227 return (nr + 1) >> 1;
228 }
229
230 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
231 {
232 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
233 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
234 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
235 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
236 }
237
238 GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
239 {
240 GLuint i;
241 uint16_t * dst16 = (uint16_t *) dst;
242
243 for (i = 0; i < nr; i++) {
244 dst16[i] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
245 }
246
247 return (nr + 1) >> 1;
248 }
249
250 GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
251 {
252 /* No idea what this value means. I have seen other values written to
253 * this register... */
254 return 0x5555;
255 }
256
257 GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
258 {
259 r300ContextPtr rmesa = R300_CONTEXT(ctx);
260 GLuint i, vic_1 = 0;
261
262 if (InputsRead & (1 << VERT_ATTRIB_POS))
263 vic_1 |= R300_INPUT_CNTL_POS;
264
265 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
266 vic_1 |= R300_INPUT_CNTL_NORMAL;
267
268 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
269 vic_1 |= R300_INPUT_CNTL_COLOR;
270
271 rmesa->state.texture.tc_count = 0;
272 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
273 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
274 rmesa->state.texture.tc_count++;
275 vic_1 |= R300_INPUT_CNTL_TC0 << i;
276 }
277
278 return vic_1;
279 }
280
281 GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
282 {
283 GLuint ret = 0;
284
285 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
286 ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
287
288 if (OutputsWritten & (1 << VERT_RESULT_COL0))
289 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
290
291 if (OutputsWritten & (1 << VERT_RESULT_COL1))
292 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
293
294 if (OutputsWritten & (1 << VERT_RESULT_BFC0)
295 || OutputsWritten & (1 << VERT_RESULT_BFC1))
296 ret |=
297 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
298 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
299 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
300
301 #if 0
302 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
303 #endif
304
305 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
306 ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
307
308 return ret;
309 }
310
311 GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
312 {
313 GLuint i, ret = 0;
314
315 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
316 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
317 ret |= (4 << (3 * i));
318 }
319 }
320
321 return ret;
322 }
323
324 /* Emit vertex data to GART memory
325 * Route inputs to the vertex processor
326 * This function should never return R300_FALLBACK_TCL when using software tcl.
327 */
328 int r300EmitArrays(GLcontext * ctx)
329 {
330 r300ContextPtr rmesa = R300_CONTEXT(ctx);
331 TNLcontext *tnl = TNL_CONTEXT(ctx);
332 struct vertex_buffer *vb = &tnl->vb;
333 GLuint nr;
334 GLuint count = vb->Count;
335 GLuint i;
336 GLuint InputsRead = 0, OutputsWritten = 0;
337 int *inputs = NULL;
338 int vir_inputs[VERT_ATTRIB_MAX];
339 GLint tab[VERT_ATTRIB_MAX];
340 int swizzle[VERT_ATTRIB_MAX][4];
341 struct r300_vertex_program *prog =
342 (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
343
344 if (hw_tcl_on) {
345 inputs = prog->inputs;
346 InputsRead = prog->key.InputsRead;
347 OutputsWritten = prog->key.OutputsWritten;
348 } else {
349 inputs = rmesa->state.sw_tcl_inputs;
350
351 DECLARE_RENDERINPUTS(render_inputs_bitset);
352 RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
353
354 vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
355
356 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
357 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
358 //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
359
360 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
361 InputsRead |= 1 << VERT_ATTRIB_POS;
362 OutputsWritten |= 1 << VERT_RESULT_HPOS;
363 }
364
365 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
366 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
367 OutputsWritten |= 1 << VERT_RESULT_COL0;
368 }
369
370 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
371 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
372 OutputsWritten |= 1 << VERT_RESULT_COL1;
373 }
374
375 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
376 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
377 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
378 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
379 }
380 }
381
382 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
383 if (InputsRead & (1 << i)) {
384 inputs[i] = nr++;
385 } else {
386 inputs[i] = -1;
387 }
388 }
389
390 /* Fixed, apply to vir0 only */
391 memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
392 inputs = vir_inputs;
393 if (InputsRead & VERT_ATTRIB_POS)
394 inputs[VERT_ATTRIB_POS] = 0;
395 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
396 inputs[VERT_ATTRIB_COLOR0] = 2;
397 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
398 inputs[VERT_ATTRIB_COLOR1] = 3;
399 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
400 if (InputsRead & (1 << i))
401 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
402
403 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
404 }
405
406 assert(InputsRead);
407 assert(OutputsWritten);
408
409 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
410 if (InputsRead & (1 << i)) {
411 tab[nr++] = i;
412 }
413 }
414
415 if (nr > R300_MAX_AOS_ARRAYS) {
416 return R300_FALLBACK_TCL;
417 }
418
419 for (i = 0; i < nr; i++) {
420 int ci, fix, found = 0;
421
422 swizzle[i][0] = SWIZZLE_ZERO;
423 swizzle[i][1] = SWIZZLE_ZERO;
424 swizzle[i][2] = SWIZZLE_ZERO;
425 swizzle[i][3] = SWIZZLE_ONE;
426
427 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
428 swizzle[i][ci] = ci;
429 }
430
431 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
432 if (vb->AttribPtr[tab[i]]->stride % 4) {
433 return R300_FALLBACK_TCL;
434 }
435 rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
436 rmesa->state.aos[i].start = 0;
437 rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
438 rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
439 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
440 } else {
441 r300EmitVec(ctx, &rmesa->state.aos[i],
442 vb->AttribPtr[tab[i]]->data,
443 vb->AttribPtr[tab[i]]->size,
444 vb->AttribPtr[tab[i]]->stride, count);
445 }
446
447 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
448
449 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
450 if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
451 continue;
452 }
453 found = 1;
454 break;
455 }
456
457 if (found) {
458 if (fix > 0) {
459 WARN_ONCE("Feeling lucky?\n");
460 }
461 rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
462 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
463 swizzle[i][ci] += fix;
464 }
465 } else {
466 WARN_ONCE
467 ("Cannot handle offset %x with stride %d, comp %d\n",
468 rmesa->state.aos[i].aos_offset,
469 rmesa->state.aos[i].aos_stride,
470 vb->AttribPtr[tab[i]]->size);
471 return R300_FALLBACK_TCL;
472 }
473 }
474
475 /* Setup INPUT_ROUTE. */
476 R300_STATECHANGE(rmesa, vir[0]);
477 ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
478 r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
479 vb->AttribPtr, inputs, tab, nr);
480 R300_STATECHANGE(rmesa, vir[1]);
481 ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
482 r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
483 nr);
484
485 /* Setup INPUT_CNTL. */
486 R300_STATECHANGE(rmesa, vic);
487 rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
488 rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
489
490 /* Setup OUTPUT_VTX_FMT. */
491 R300_STATECHANGE(rmesa, vof);
492 rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
493 r300VAPOutputCntl0(ctx, OutputsWritten);
494 rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
495 r300VAPOutputCntl1(ctx, OutputsWritten);
496
497 rmesa->state.aos_count = nr;
498
499 return R300_FALLBACK_NONE;
500 }
501
502 #ifdef USER_BUFFERS
503 void r300UseArrays(GLcontext * ctx)
504 {
505 r300ContextPtr rmesa = R300_CONTEXT(ctx);
506 int i;
507
508 if (rmesa->state.elt_dma.buf)
509 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
510
511 for (i = 0; i < rmesa->state.aos_count; i++) {
512 if (rmesa->state.aos[i].buf)
513 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
514 }
515 }
516 #endif
517
518 void r300ReleaseArrays(GLcontext * ctx)
519 {
520 r300ContextPtr rmesa = R300_CONTEXT(ctx);
521 int i;
522
523 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
524 for (i = 0; i < rmesa->state.aos_count; i++) {
525 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
526 }
527 }
528
529 void r300EmitCacheFlush(r300ContextPtr rmesa)
530 {
531 int cmd_reserved = 0;
532 int cmd_written = 0;
533
534 drm_radeon_cmd_header_t *cmd = NULL;
535
536 reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
537 e32(RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
538 RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
539
540 reg_start(ZB_ZCACHE_CTLSTAT, 0);
541 e32(ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
542 ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
543 }