r300: Use "rmesa" not "r300" in r300_emit.c; some of the macros require "rmesa".
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
90 GLvoid * data, int stride, int count)
91 {
92 int i;
93 int *out = (int *)(rvb->address + rvb->start);
94
95 if (RADEON_DEBUG & DEBUG_VERTS)
96 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
97 __FUNCTION__, count, stride, (void *)out, (void *)data);
98
99 if (stride == 4)
100 COPY_DWORDS(out, data, count);
101 else
102 for (i = 0; i < count; i++) {
103 out[0] = *(int *)data;
104 out++;
105 data += stride;
106 }
107 }
108
109 static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
110 GLvoid * data, int stride, int count)
111 {
112 int i;
113 int *out = (int *)(rvb->address + rvb->start);
114
115 if (RADEON_DEBUG & DEBUG_VERTS)
116 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
117 __FUNCTION__, count, stride, (void *)out, (void *)data);
118
119 if (stride == 8)
120 COPY_DWORDS(out, data, count * 2);
121 else
122 for (i = 0; i < count; i++) {
123 out[0] = *(int *)data;
124 out[1] = *(int *)(data + 4);
125 out += 2;
126 data += stride;
127 }
128 }
129
130 static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
131 GLvoid * data, int stride, int count)
132 {
133 int i;
134 int *out = (int *)(rvb->address + rvb->start);
135
136 if (RADEON_DEBUG & DEBUG_VERTS)
137 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
138 __FUNCTION__, count, stride, (void *)out, (void *)data);
139
140 if (stride == 12)
141 COPY_DWORDS(out, data, count * 3);
142 else
143 for (i = 0; i < count; i++) {
144 out[0] = *(int *)data;
145 out[1] = *(int *)(data + 4);
146 out[2] = *(int *)(data + 8);
147 out += 3;
148 data += stride;
149 }
150 }
151
152 static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
153 GLvoid * data, int stride, int count)
154 {
155 int i;
156 int *out = (int *)(rvb->address + rvb->start);
157
158 if (RADEON_DEBUG & DEBUG_VERTS)
159 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
160 __FUNCTION__, count, stride, (void *)out, (void *)data);
161
162 if (stride == 16)
163 COPY_DWORDS(out, data, count * 4);
164 else
165 for (i = 0; i < count; i++) {
166 out[0] = *(int *)data;
167 out[1] = *(int *)(data + 4);
168 out[2] = *(int *)(data + 8);
169 out[3] = *(int *)(data + 12);
170 out += 4;
171 data += stride;
172 }
173 }
174
175 static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
176 GLvoid * data, int size, int stride, int count)
177 {
178 r300ContextPtr rmesa = R300_CONTEXT(ctx);
179
180 if (stride == 0) {
181 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
182 count = 1;
183 rvb->aos_offset = GET_START(rvb);
184 rvb->aos_stride = 0;
185 } else {
186 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
187 rvb->aos_offset = GET_START(rvb);
188 rvb->aos_stride = size;
189 }
190
191 switch (size) {
192 case 1:
193 r300EmitVec4(ctx, rvb, data, stride, count);
194 break;
195 case 2:
196 r300EmitVec8(ctx, rvb, data, stride, count);
197 break;
198 case 3:
199 r300EmitVec12(ctx, rvb, data, stride, count);
200 break;
201 case 4:
202 r300EmitVec16(ctx, rvb, data, stride, count);
203 break;
204 default:
205 assert(0);
206 break;
207 }
208 }
209
210 static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
211 int *inputs, GLint * tab, GLuint nr)
212 {
213 GLuint i, dw;
214
215 /* dw: size, inputs, stop bit, type */
216 for (i = 0; i + 1 < nr; i += 2) {
217 dw = (attribptr[tab[i]]->size - 1) | (inputs[tab[i]] << 8) | (AOS_FORMAT_FLOAT << 14);
218 dw |= ((attribptr[tab[i + 1]]->size - 1) | (inputs[tab[i + 1]] << 8) | (AOS_FORMAT_FLOAT << 14)) << 16;
219 if (i + 2 == nr) {
220 dw |= (1 << (13 + 16));
221 }
222 dst[i >> 1] = dw;
223 }
224
225 if (nr & 1) {
226 dw = (attribptr[tab[nr - 1]]->size - 1) | (inputs[tab[nr - 1]] << 8) | (AOS_FORMAT_FLOAT << 14);
227 dw |= 1 << 13;
228 dst[nr >> 1] = dw;
229 }
230
231 return (nr + 1) >> 1;
232 }
233
234 static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
235 {
236 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
237 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
238 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
239 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
240 }
241
242 static GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
243 {
244 GLuint i;
245
246 for (i = 0; i + 1 < nr; i += 2) {
247 dst[i >> 1] = r300VAPInputRoute1Swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
248 dst[i >> 1] |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
249 }
250
251 if (nr & 1) {
252 dst[nr >> 1] = r300VAPInputRoute1Swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
253 }
254
255 return (nr + 1) >> 1;
256 }
257
258 static GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
259 {
260 /* Hard coded value, no idea what it means */
261 return 0x5555;
262 }
263
264 static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
265 {
266 r300ContextPtr rmesa = R300_CONTEXT(ctx);
267 GLuint i, vic_1 = 0;
268
269 if (InputsRead & (1 << VERT_ATTRIB_POS))
270 vic_1 |= R300_INPUT_CNTL_POS;
271
272 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
273 vic_1 |= R300_INPUT_CNTL_NORMAL;
274
275 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
276 vic_1 |= R300_INPUT_CNTL_COLOR;
277
278 rmesa->state.texture.tc_count = 0;
279 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
280 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
281 rmesa->state.texture.tc_count++;
282 vic_1 |= R300_INPUT_CNTL_TC0 << i;
283 }
284
285 return vic_1;
286 }
287
288 static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
289 {
290 GLuint ret = 0;
291
292 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
293 ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
294
295 if (OutputsWritten & (1 << VERT_RESULT_COL0))
296 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
297
298 if (OutputsWritten & (1 << VERT_RESULT_COL1))
299 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
300
301 #if 0
302 if (OutputsWritten & (1 << VERT_RESULT_BFC0))
303 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
304
305 if (OutputsWritten & (1 << VERT_RESULT_BFC1))
306 ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
307
308 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
309 #endif
310
311 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
312 ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
313
314 return ret;
315 }
316
317 static GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
318 {
319 GLuint i, ret = 0;
320
321 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
322 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
323 ret |= (4 << (3 * i));
324 }
325 }
326
327 return ret;
328 }
329
330 /* Emit vertex data to GART memory
331 * Route inputs to the vertex processor
332 * This function should never return R300_FALLBACK_TCL when using software tcl.
333 */
334 int r300EmitArrays(GLcontext * ctx)
335 {
336 r300ContextPtr rmesa = R300_CONTEXT(ctx);
337 TNLcontext *tnl = TNL_CONTEXT(ctx);
338 struct vertex_buffer *vb = &tnl->vb;
339 GLuint nr;
340 GLuint count = vb->Count;
341 GLuint i;
342 GLuint InputsRead = 0, OutputsWritten = 0;
343 int *inputs = NULL;
344 int vir_inputs[VERT_ATTRIB_MAX];
345 GLint tab[VERT_ATTRIB_MAX];
346 int swizzle[VERT_ATTRIB_MAX][4];
347 struct r300_vertex_program *prog =
348 (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
349
350 if (hw_tcl_on) {
351 inputs = prog->inputs;
352 InputsRead = prog->key.InputsRead;
353 OutputsWritten = prog->key.OutputsWritten;
354 } else {
355 inputs = rmesa->state.sw_tcl_inputs;
356
357 DECLARE_RENDERINPUTS(render_inputs_bitset);
358 RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
359
360 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
361 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
362 assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
363
364 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
365 InputsRead |= 1 << VERT_ATTRIB_POS;
366 OutputsWritten |= 1 << VERT_RESULT_HPOS;
367 }
368
369 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
370 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
371 OutputsWritten |= 1 << VERT_RESULT_COL0;
372 }
373
374 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
375 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
376 OutputsWritten |= 1 << VERT_RESULT_COL1;
377 }
378
379 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
380 if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
381 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
382 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
383 }
384 }
385
386 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
387 if (InputsRead & (1 << i)) {
388 inputs[i] = nr++;
389 } else {
390 inputs[i] = -1;
391 }
392 }
393
394 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
395 /* Fixed, apply to vir0 only */
396 memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
397 inputs = vir_inputs;
398 if (InputsRead & VERT_ATTRIB_POS)
399 inputs[VERT_ATTRIB_POS] = 0;
400 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
401 inputs[VERT_ATTRIB_COLOR0] = 2;
402 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
403 inputs[VERT_ATTRIB_COLOR1] = 3;
404 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
405 if (InputsRead & (1 << i))
406 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
407 }
408
409 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
410 }
411
412 assert(InputsRead);
413 assert(OutputsWritten);
414
415 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
416 if (InputsRead & (1 << i)) {
417 tab[nr++] = i;
418 }
419 }
420
421 if (nr > R300_MAX_AOS_ARRAYS) {
422 return R300_FALLBACK_TCL;
423 }
424
425 for (i = 0; i < nr; i++) {
426 int ci, fix, found = 0;
427
428 swizzle[i][0] = SWIZZLE_ZERO;
429 swizzle[i][1] = SWIZZLE_ZERO;
430 swizzle[i][2] = SWIZZLE_ZERO;
431 swizzle[i][3] = SWIZZLE_ONE;
432
433 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
434 swizzle[i][ci] = ci;
435 }
436
437 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
438 if (vb->AttribPtr[tab[i]]->stride % 4) {
439 return R300_FALLBACK_TCL;
440 }
441 rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
442 rmesa->state.aos[i].start = 0;
443 rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
444 rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
445 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
446 } else {
447 r300EmitVec(ctx, &rmesa->state.aos[i],
448 vb->AttribPtr[tab[i]]->data,
449 vb->AttribPtr[tab[i]]->size,
450 vb->AttribPtr[tab[i]]->stride, count);
451 }
452
453 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
454
455 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
456 if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
457 continue;
458 }
459 found = 1;
460 break;
461 }
462
463 if (found) {
464 if (fix > 0) {
465 WARN_ONCE("Feeling lucky?\n");
466 }
467 rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
468 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
469 swizzle[i][ci] += fix;
470 }
471 } else {
472 WARN_ONCE
473 ("Cannot handle offset %x with stride %d, comp %d\n",
474 rmesa->state.aos[i].aos_offset,
475 rmesa->state.aos[i].aos_stride,
476 vb->AttribPtr[tab[i]]->size);
477 return R300_FALLBACK_TCL;
478 }
479 }
480
481 /* Setup INPUT_ROUTE. */
482 R300_STATECHANGE(rmesa, vir[0]);
483 ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
484 r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
485 vb->AttribPtr, inputs, tab, nr);
486
487 R300_STATECHANGE(rmesa, vir[1]);
488 ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
489 r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
490 nr);
491
492 /* Setup INPUT_CNTL. */
493 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
494 R300_STATECHANGE(rmesa, vic);
495 rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
496 rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
497
498 /* Setup OUTPUT_VTX_FMT. */
499 R300_STATECHANGE(rmesa, vof);
500 rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
501 r300VAPOutputCntl0(ctx, OutputsWritten);
502 rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
503 r300VAPOutputCntl1(ctx, OutputsWritten);
504
505 rmesa->state.aos_count = nr;
506
507 return R300_FALLBACK_NONE;
508 }
509
510 #ifdef USER_BUFFERS
511 void r300UseArrays(GLcontext * ctx)
512 {
513 r300ContextPtr rmesa = R300_CONTEXT(ctx);
514 int i;
515
516 if (rmesa->state.elt_dma.buf)
517 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
518
519 for (i = 0; i < rmesa->state.aos_count; i++) {
520 if (rmesa->state.aos[i].buf)
521 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
522 }
523 }
524 #endif
525
526 void r300ReleaseArrays(GLcontext * ctx)
527 {
528 r300ContextPtr rmesa = R300_CONTEXT(ctx);
529 int i;
530
531 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
532 for (i = 0; i < rmesa->state.aos_count; i++) {
533 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
534 }
535 }