slang: initialize the context
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "main/glheader.h"
37 #include "main/mtypes.h"
38 #include "main/colormac.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
90 GLvoid * data, int stride, int count)
91 {
92 int i;
93 int *out = (int *)(rvb->address + rvb->start);
94
95 if (RADEON_DEBUG & DEBUG_VERTS)
96 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
97 __FUNCTION__, count, stride, (void *)out, (void *)data);
98
99 if (stride == 4)
100 COPY_DWORDS(out, data, count);
101 else
102 for (i = 0; i < count; i++) {
103 out[0] = *(int *)data;
104 out++;
105 data += stride;
106 }
107 }
108
109 static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
110 GLvoid * data, int stride, int count)
111 {
112 int i;
113 int *out = (int *)(rvb->address + rvb->start);
114
115 if (RADEON_DEBUG & DEBUG_VERTS)
116 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
117 __FUNCTION__, count, stride, (void *)out, (void *)data);
118
119 if (stride == 8)
120 COPY_DWORDS(out, data, count * 2);
121 else
122 for (i = 0; i < count; i++) {
123 out[0] = *(int *)data;
124 out[1] = *(int *)(data + 4);
125 out += 2;
126 data += stride;
127 }
128 }
129
130 static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
131 GLvoid * data, int stride, int count)
132 {
133 int i;
134 int *out = (int *)(rvb->address + rvb->start);
135
136 if (RADEON_DEBUG & DEBUG_VERTS)
137 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
138 __FUNCTION__, count, stride, (void *)out, (void *)data);
139
140 if (stride == 12)
141 COPY_DWORDS(out, data, count * 3);
142 else
143 for (i = 0; i < count; i++) {
144 out[0] = *(int *)data;
145 out[1] = *(int *)(data + 4);
146 out[2] = *(int *)(data + 8);
147 out += 3;
148 data += stride;
149 }
150 }
151
152 static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
153 GLvoid * data, int stride, int count)
154 {
155 int i;
156 int *out = (int *)(rvb->address + rvb->start);
157
158 if (RADEON_DEBUG & DEBUG_VERTS)
159 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
160 __FUNCTION__, count, stride, (void *)out, (void *)data);
161
162 if (stride == 16)
163 COPY_DWORDS(out, data, count * 4);
164 else
165 for (i = 0; i < count; i++) {
166 out[0] = *(int *)data;
167 out[1] = *(int *)(data + 4);
168 out[2] = *(int *)(data + 8);
169 out[3] = *(int *)(data + 12);
170 out += 4;
171 data += stride;
172 }
173 }
174
175 static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
176 GLvoid * data, int size, int stride, int count)
177 {
178 r300ContextPtr rmesa = R300_CONTEXT(ctx);
179
180 if (stride == 0) {
181 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
182 count = 1;
183 rvb->aos_offset = GET_START(rvb);
184 rvb->aos_stride = 0;
185 } else {
186 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
187 rvb->aos_offset = GET_START(rvb);
188 rvb->aos_stride = size;
189 }
190
191 switch (size) {
192 case 1:
193 r300EmitVec4(ctx, rvb, data, stride, count);
194 break;
195 case 2:
196 r300EmitVec8(ctx, rvb, data, stride, count);
197 break;
198 case 3:
199 r300EmitVec12(ctx, rvb, data, stride, count);
200 break;
201 case 4:
202 r300EmitVec16(ctx, rvb, data, stride, count);
203 break;
204 default:
205 assert(0);
206 break;
207 }
208 }
209
210 #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
211 (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
212
213 GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
214 int *inputs, GLint * tab, GLuint nr)
215 {
216 GLuint i, dw;
217
218 /* type, inputs, stop bit, size */
219 for (i = 0; i < nr; i += 2) {
220 /* make sure input is valid, would lockup the gpu */
221 assert(inputs[tab[i]] != -1);
222 dw = (R300_SIGNED | DW_SIZE(i));
223 if (i + 1 == nr) {
224 dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
225 } else {
226 assert(inputs[tab[i + 1]] != -1);
227 dw |= (R300_SIGNED |
228 DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
229 if (i + 2 == nr) {
230 dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
231 }
232 }
233 dst[i >> 1] = dw;
234 }
235
236 return (nr + 1) >> 1;
237 }
238
239 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
240 {
241 return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
242 (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
243 (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
244 (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
245 }
246
247 GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
248 {
249 GLuint i, dw;
250
251 for (i = 0; i < nr; i += 2) {
252 dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
253 ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
254 R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
255 if (i + 1 < nr) {
256 dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
257 ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
258 R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
259 }
260 dst[i >> 1] = dw;
261 }
262
263 return (nr + 1) >> 1;
264 }
265
266 GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
267 {
268 /* No idea what this value means. I have seen other values written to
269 * this register... */
270 return 0x5555;
271 }
272
273 GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
274 {
275 r300ContextPtr rmesa = R300_CONTEXT(ctx);
276 GLuint i, vic_1 = 0;
277
278 if (InputsRead & (1 << VERT_ATTRIB_POS))
279 vic_1 |= R300_INPUT_CNTL_POS;
280
281 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
282 vic_1 |= R300_INPUT_CNTL_NORMAL;
283
284 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
285 vic_1 |= R300_INPUT_CNTL_COLOR;
286
287 rmesa->state.texture.tc_count = 0;
288 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
289 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
290 rmesa->state.texture.tc_count++;
291 vic_1 |= R300_INPUT_CNTL_TC0 << i;
292 }
293
294 return vic_1;
295 }
296
297 GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
298 {
299 GLuint ret = 0;
300
301 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
302 ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
303
304 if (OutputsWritten & (1 << VERT_RESULT_COL0))
305 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
306
307 if (OutputsWritten & (1 << VERT_RESULT_COL1))
308 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
309
310 if (OutputsWritten & (1 << VERT_RESULT_BFC0)
311 || OutputsWritten & (1 << VERT_RESULT_BFC1))
312 ret |=
313 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
314 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
315 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
316
317 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
318 ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
319
320 return ret;
321 }
322
323 GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
324 {
325 GLuint i, ret = 0, first_free_texcoord = 0;
326
327 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
328 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
329 ret |= (4 << (3 * i));
330 ++first_free_texcoord;
331 }
332 }
333
334 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
335 if (first_free_texcoord > 8) {
336 fprintf(stderr, "\tout of free texcoords to write fog coord\n");
337 _mesa_exit(-1);
338 }
339 ret |= 4 << (3 * first_free_texcoord);
340 }
341
342 return ret;
343 }
344
345 /* Emit vertex data to GART memory
346 * Route inputs to the vertex processor
347 * This function should never return R300_FALLBACK_TCL when using software tcl.
348 */
349 int r300EmitArrays(GLcontext * ctx)
350 {
351 r300ContextPtr rmesa = R300_CONTEXT(ctx);
352 TNLcontext *tnl = TNL_CONTEXT(ctx);
353 struct vertex_buffer *vb = &tnl->vb;
354 GLuint nr;
355 GLuint count = vb->Count;
356 GLuint i;
357 GLuint InputsRead = 0, OutputsWritten = 0;
358 int *inputs = NULL;
359 int vir_inputs[VERT_ATTRIB_MAX];
360 GLint tab[VERT_ATTRIB_MAX];
361 int swizzle[VERT_ATTRIB_MAX][4];
362 struct r300_vertex_program *prog =
363 (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
364
365 if (hw_tcl_on) {
366 inputs = prog->inputs;
367 InputsRead = prog->key.InputsRead;
368 OutputsWritten = prog->key.OutputsWritten;
369 } else {
370 inputs = rmesa->state.sw_tcl_inputs;
371
372 DECLARE_RENDERINPUTS(render_inputs_bitset);
373 RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
374
375 vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
376
377 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
378 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
379 //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
380
381 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
382 InputsRead |= 1 << VERT_ATTRIB_POS;
383 OutputsWritten |= 1 << VERT_RESULT_HPOS;
384 }
385
386 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
387 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
388 OutputsWritten |= 1 << VERT_RESULT_COL0;
389 }
390
391 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
392 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
393 OutputsWritten |= 1 << VERT_RESULT_COL1;
394 }
395
396 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
397 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
398 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
399 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
400 }
401 }
402
403 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
404 if (InputsRead & (1 << i)) {
405 inputs[i] = nr++;
406 } else {
407 inputs[i] = -1;
408 }
409 }
410
411 /* Fixed, apply to vir0 only */
412 memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
413 inputs = vir_inputs;
414 if (InputsRead & VERT_ATTRIB_POS)
415 inputs[VERT_ATTRIB_POS] = 0;
416 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
417 inputs[VERT_ATTRIB_COLOR0] = 2;
418 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
419 inputs[VERT_ATTRIB_COLOR1] = 3;
420 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
421 if (InputsRead & (1 << i))
422 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
423
424 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
425 }
426
427 assert(InputsRead);
428 assert(OutputsWritten);
429
430 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
431 if (InputsRead & (1 << i)) {
432 tab[nr++] = i;
433 }
434 }
435
436 if (nr > R300_MAX_AOS_ARRAYS) {
437 return R300_FALLBACK_TCL;
438 }
439
440 for (i = 0; i < nr; i++) {
441 int ci, fix, found = 0;
442
443 swizzle[i][0] = SWIZZLE_ZERO;
444 swizzle[i][1] = SWIZZLE_ZERO;
445 swizzle[i][2] = SWIZZLE_ZERO;
446 swizzle[i][3] = SWIZZLE_ONE;
447
448 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
449 swizzle[i][ci] = ci;
450 }
451
452 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
453 if (vb->AttribPtr[tab[i]]->stride % 4) {
454 return R300_FALLBACK_TCL;
455 }
456 rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
457 rmesa->state.aos[i].start = 0;
458 rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
459 rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
460 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
461 } else {
462 r300EmitVec(ctx, &rmesa->state.aos[i],
463 vb->AttribPtr[tab[i]]->data,
464 vb->AttribPtr[tab[i]]->size,
465 vb->AttribPtr[tab[i]]->stride, count);
466 }
467
468 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
469
470 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
471 if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
472 continue;
473 }
474 found = 1;
475 break;
476 }
477
478 if (found) {
479 if (fix > 0) {
480 WARN_ONCE("Feeling lucky?\n");
481 }
482 rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
483 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
484 swizzle[i][ci] += fix;
485 }
486 } else {
487 WARN_ONCE
488 ("Cannot handle offset %x with stride %d, comp %d\n",
489 rmesa->state.aos[i].aos_offset,
490 rmesa->state.aos[i].aos_stride,
491 vb->AttribPtr[tab[i]]->size);
492 return R300_FALLBACK_TCL;
493 }
494 }
495
496 /* Setup INPUT_ROUTE. */
497 R300_STATECHANGE(rmesa, vir[0]);
498 ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
499 r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
500 vb->AttribPtr, inputs, tab, nr);
501 R300_STATECHANGE(rmesa, vir[1]);
502 ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
503 r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
504 nr);
505
506 /* Setup INPUT_CNTL. */
507 R300_STATECHANGE(rmesa, vic);
508 rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
509 rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
510
511 /* Setup OUTPUT_VTX_FMT. */
512 R300_STATECHANGE(rmesa, vof);
513 rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
514 r300VAPOutputCntl0(ctx, OutputsWritten);
515 rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
516 r300VAPOutputCntl1(ctx, OutputsWritten);
517
518 rmesa->state.aos_count = nr;
519
520 return R300_FALLBACK_NONE;
521 }
522
523 #ifdef USER_BUFFERS
524 void r300UseArrays(GLcontext * ctx)
525 {
526 r300ContextPtr rmesa = R300_CONTEXT(ctx);
527 int i;
528
529 if (rmesa->state.elt_dma.buf)
530 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
531
532 for (i = 0; i < rmesa->state.aos_count; i++) {
533 if (rmesa->state.aos[i].buf)
534 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
535 }
536 }
537 #endif
538
539 void r300ReleaseArrays(GLcontext * ctx)
540 {
541 r300ContextPtr rmesa = R300_CONTEXT(ctx);
542 int i;
543
544 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
545 for (i = 0; i < rmesa->state.aos_count; i++) {
546 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
547 }
548 }
549
550 void r300EmitCacheFlush(r300ContextPtr rmesa)
551 {
552 int cmd_reserved = 0;
553 int cmd_written = 0;
554
555 drm_radeon_cmd_header_t *cmd = NULL;
556
557 reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
558 e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
559 R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
560
561 reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
562 e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
563 R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
564 }