r300: Specify the type in the t_vir0 function.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_emit.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * \file
32 *
33 * \author Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "mtypes.h"
38 #include "colormac.h"
39 #include "imports.h"
40 #include "macros.h"
41 #include "image.h"
42
43 #include "swrast_setup/swrast_setup.h"
44 #include "math/m_translate.h"
45 #include "tnl/tnl.h"
46 #include "tnl/t_context.h"
47
48 #include "r300_context.h"
49 #include "radeon_ioctl.h"
50 #include "r300_state.h"
51 #include "r300_emit.h"
52 #include "r300_ioctl.h"
53
54 #ifdef USER_BUFFERS
55 #include "r300_mem.h"
56 #endif
57
58 #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
59 SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
60 SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
61 SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
62 SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
63 SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
64 #error Cannot change these!
65 #endif
66
67 #define DEBUG_ALL DEBUG_VERTS
68
69 #if defined(USE_X86_ASM)
70 #define COPY_DWORDS( dst, src, nr ) \
71 do { \
72 int __tmp; \
73 __asm__ __volatile__( "rep ; movsl" \
74 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
75 : "0" (nr), \
76 "D" ((long)dst), \
77 "S" ((long)src) ); \
78 } while (0)
79 #else
80 #define COPY_DWORDS( dst, src, nr ) \
81 do { \
82 int j; \
83 for ( j = 0 ; j < nr ; j++ ) \
84 dst[j] = ((int *)src)[j]; \
85 dst += nr; \
86 } while (0)
87 #endif
88
89 static void r300EmitVec4(GLcontext * ctx,
90 struct r300_dma_region *rvb,
91 GLvoid * data, int stride, int count)
92 {
93 int i;
94 int *out = (int *)(rvb->address + rvb->start);
95
96 if (RADEON_DEBUG & DEBUG_VERTS)
97 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98 __FUNCTION__, count, stride, (void *)out, (void *)data);
99
100 if (stride == 4)
101 COPY_DWORDS(out, data, count);
102 else
103 for (i = 0; i < count; i++) {
104 out[0] = *(int *)data;
105 out++;
106 data += stride;
107 }
108 }
109
110 static void r300EmitVec8(GLcontext * ctx,
111 struct r300_dma_region *rvb,
112 GLvoid * data, int stride, int count)
113 {
114 int i;
115 int *out = (int *)(rvb->address + rvb->start);
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119 __FUNCTION__, count, stride, (void *)out, (void *)data);
120
121 if (stride == 8)
122 COPY_DWORDS(out, data, count * 2);
123 else
124 for (i = 0; i < count; i++) {
125 out[0] = *(int *)data;
126 out[1] = *(int *)(data + 4);
127 out += 2;
128 data += stride;
129 }
130 }
131
132 static void r300EmitVec12(GLcontext * ctx,
133 struct r300_dma_region *rvb,
134 GLvoid * data, int stride, int count)
135 {
136 int i;
137 int *out = (int *)(rvb->address + rvb->start);
138
139 if (RADEON_DEBUG & DEBUG_VERTS)
140 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
141 __FUNCTION__, count, stride, (void *)out, (void *)data);
142
143 if (stride == 12)
144 COPY_DWORDS(out, data, count * 3);
145 else
146 for (i = 0; i < count; i++) {
147 out[0] = *(int *)data;
148 out[1] = *(int *)(data + 4);
149 out[2] = *(int *)(data + 8);
150 out += 3;
151 data += stride;
152 }
153 }
154
155 static void r300EmitVec16(GLcontext * ctx,
156 struct r300_dma_region *rvb,
157 GLvoid * data, int stride, int count)
158 {
159 int i;
160 int *out = (int *)(rvb->address + rvb->start);
161
162 if (RADEON_DEBUG & DEBUG_VERTS)
163 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
164 __FUNCTION__, count, stride, (void *)out, (void *)data);
165
166 if (stride == 16)
167 COPY_DWORDS(out, data, count * 4);
168 else
169 for (i = 0; i < count; i++) {
170 out[0] = *(int *)data;
171 out[1] = *(int *)(data + 4);
172 out[2] = *(int *)(data + 8);
173 out[3] = *(int *)(data + 12);
174 out += 4;
175 data += stride;
176 }
177 }
178
179 static void r300EmitVec(GLcontext * ctx,
180 struct r300_dma_region *rvb,
181 GLvoid * data, int size, int stride, int count)
182 {
183 r300ContextPtr rmesa = R300_CONTEXT(ctx);
184
185 if (stride == 0) {
186 r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
187 count = 1;
188 rvb->aos_offset = GET_START(rvb);
189 rvb->aos_stride = 0;
190 } else {
191 r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
192 rvb->aos_offset = GET_START(rvb);
193 rvb->aos_stride = size;
194 }
195
196 switch (size) {
197 case 1:
198 r300EmitVec4(ctx, rvb, data, stride, count);
199 break;
200 case 2:
201 r300EmitVec8(ctx, rvb, data, stride, count);
202 break;
203 case 3:
204 r300EmitVec12(ctx, rvb, data, stride, count);
205 break;
206 case 4:
207 r300EmitVec16(ctx, rvb, data, stride, count);
208 break;
209 default:
210 assert(0);
211 _mesa_exit(-1);
212 break;
213 }
214
215 }
216
217 /* dw: size, inputs, stop bit, type
218 *
219 * I'll create some documentation for t_vir0 and t_vir1 tomorrow and probably
220 * add the shifts as defines in r300_reg.h.
221 */
222 static GLuint t_vir0(uint32_t * dst, GLvector4f ** dt, int *inputs, GLint * tab, GLuint nr)
223 {
224 GLuint i, dw;
225
226 for (i = 0; i + 1 < nr; i += 2) {
227 dw = (dt[tab[i]]->size - 1) | (inputs[tab[i]] << 8) | (AOS_FORMAT_FLOAT << 14);
228 dw |= ((dt[tab[i + 1]]->size - 1) | (inputs[tab[i + 1]] << 8) | (AOS_FORMAT_FLOAT << 14)) << 16;
229 if (i + 2 == nr) {
230 dw |= (1 << (13 + 16));
231 }
232 dst[i >> 1] = dw;
233 }
234
235 if (nr & 1) {
236 dw = (dt[tab[nr - 1]]->size - 1) | (inputs[tab[nr - 1]] << 8) | (AOS_FORMAT_FLOAT << 14);
237 dw |= 1 << 13;
238 dst[nr >> 1] = dw;
239 }
240
241 return (nr + 1) >> 1;
242 }
243
244 static GLuint t_swizzle(int swizzle[4])
245 {
246 return (swizzle[0] << R300_INPUT_ROUTE_X_SHIFT) |
247 (swizzle[1] << R300_INPUT_ROUTE_Y_SHIFT) |
248 (swizzle[2] << R300_INPUT_ROUTE_Z_SHIFT) |
249 (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT);
250 }
251
252 static GLuint t_vir1(uint32_t * dst, int swizzle[][4], GLuint nr)
253 {
254 GLuint i;
255
256 for (i = 0; i + 1 < nr; i += 2) {
257 dst[i >> 1] = t_swizzle(swizzle[i]) | R300_INPUT_ROUTE_ENABLE;
258 dst[i >> 1] |= (t_swizzle(swizzle[i + 1]) | R300_INPUT_ROUTE_ENABLE) << 16;
259 }
260
261 if (nr & 1) {
262 dst[nr >> 1] = t_swizzle(swizzle[nr - 1]) | R300_INPUT_ROUTE_ENABLE;
263 }
264
265 return (nr + 1) >> 1;
266 }
267
268 static GLuint t_vic(GLcontext * ctx, GLuint InputsRead)
269 {
270 r300ContextPtr r300 = R300_CONTEXT(ctx);
271 GLuint i, vic_1 = 0;
272
273 if (InputsRead & (1 << VERT_ATTRIB_POS))
274 vic_1 |= R300_INPUT_CNTL_POS;
275
276 if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
277 vic_1 |= R300_INPUT_CNTL_NORMAL;
278
279 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
280 vic_1 |= R300_INPUT_CNTL_COLOR;
281
282 r300->state.texture.tc_count = 0;
283 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
284 if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
285 r300->state.texture.tc_count++;
286 vic_1 |= R300_INPUT_CNTL_TC0 << i;
287 }
288
289 return vic_1;
290 }
291
292 /* Emit vertex data to GART memory
293 * Route inputs to the vertex processor
294 * This function should never return R300_FALLBACK_TCL when using software tcl.
295 */
296
297 int r300EmitArrays(GLcontext * ctx)
298 {
299 r300ContextPtr rmesa = R300_CONTEXT(ctx);
300 r300ContextPtr r300 = rmesa;
301 TNLcontext *tnl = TNL_CONTEXT(ctx);
302 struct vertex_buffer *vb = &tnl->vb;
303 GLuint nr;
304 GLuint count = vb->Count;
305 GLuint i;
306 GLuint InputsRead = 0, OutputsWritten = 0;
307 int *inputs = NULL;
308 int vir_inputs[VERT_ATTRIB_MAX];
309 GLint tab[VERT_ATTRIB_MAX];
310 int swizzle[VERT_ATTRIB_MAX][4];
311
312 if (hw_tcl_on) {
313 struct r300_vertex_program *prog =
314 (struct r300_vertex_program *)
315 CURRENT_VERTEX_SHADER(ctx);
316 inputs = prog->inputs;
317 InputsRead = CURRENT_VERTEX_SHADER(ctx)->key.InputsRead;
318 OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
319 } else {
320 DECLARE_RENDERINPUTS(inputs_bitset);
321 inputs = r300->state.sw_tcl_inputs;
322
323 RENDERINPUTS_COPY(inputs_bitset,
324 TNL_CONTEXT(ctx)->render_inputs_bitset);
325
326 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_POS));
327 InputsRead |= 1 << VERT_ATTRIB_POS;
328 OutputsWritten |= 1 << VERT_RESULT_HPOS;
329
330 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_NORMAL)
331 == 0);
332
333 assert(RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR0));
334 InputsRead |= 1 << VERT_ATTRIB_COLOR0;
335 OutputsWritten |= 1 << VERT_RESULT_COL0;
336
337 if (RENDERINPUTS_TEST(inputs_bitset, _TNL_ATTRIB_COLOR1)) {
338 InputsRead |= 1 << VERT_ATTRIB_COLOR1;
339 OutputsWritten |= 1 << VERT_RESULT_COL1;
340 }
341
342 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
343 if (RENDERINPUTS_TEST
344 (inputs_bitset, _TNL_ATTRIB_TEX(i))) {
345 InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
346 OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
347 }
348
349 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
350 if (InputsRead & (1 << i))
351 inputs[i] = nr++;
352 else
353 inputs[i] = -1;
354
355 if (!
356 (r300->radeon.radeonScreen->
357 chip_flags & RADEON_CHIPSET_TCL)) {
358 /* Fixed, apply to vir0 only */
359 memcpy(vir_inputs, inputs,
360 VERT_ATTRIB_MAX * sizeof(int));
361 inputs = vir_inputs;
362
363 if (InputsRead & VERT_ATTRIB_POS)
364 inputs[VERT_ATTRIB_POS] = 0;
365
366 if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
367 inputs[VERT_ATTRIB_COLOR0] = 2;
368
369 if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
370 inputs[VERT_ATTRIB_COLOR1] = 3;
371
372 for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
373 if (InputsRead & (1 << i))
374 inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
375 }
376
377 RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset,
378 inputs_bitset);
379 }
380
381 assert(InputsRead);
382 assert(OutputsWritten);
383
384 for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++)
385 if (InputsRead & (1 << i))
386 tab[nr++] = i;
387
388 if (nr > R300_MAX_AOS_ARRAYS)
389 return R300_FALLBACK_TCL;
390
391 for (i = 0; i < nr; i++) {
392 int ci;
393 int comp_size, fix, found = 0;
394
395 swizzle[i][0] = SWIZZLE_ZERO;
396 swizzle[i][1] = SWIZZLE_ZERO;
397 swizzle[i][2] = SWIZZLE_ZERO;
398 swizzle[i][3] = SWIZZLE_ONE;
399
400 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
401 swizzle[i][ci] = ci;
402
403 if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
404 if (vb->AttribPtr[tab[i]]->stride % 4)
405 return R300_FALLBACK_TCL;
406
407 rmesa->state.aos[i].address =
408 (void *)(vb->AttribPtr[tab[i]]->data);
409 rmesa->state.aos[i].start = 0;
410 rmesa->state.aos[i].aos_offset =
411 r300GartOffsetFromVirtual(rmesa,
412 vb->
413 AttribPtr[tab[i]]->data);
414 rmesa->state.aos[i].aos_stride =
415 vb->AttribPtr[tab[i]]->stride / 4;
416
417 rmesa->state.aos[i].aos_size =
418 vb->AttribPtr[tab[i]]->size;
419 } else {
420 r300EmitVec(ctx, &rmesa->state.aos[i],
421 vb->AttribPtr[tab[i]]->data,
422 vb->AttribPtr[tab[i]]->size,
423 vb->AttribPtr[tab[i]]->stride, count);
424 }
425
426 rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
427
428 comp_size = _mesa_sizeof_type(GL_FLOAT);
429
430 for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
431 if ((rmesa->state.aos[i].aos_offset -
432 comp_size * fix) % 4)
433 continue;
434
435 found = 1;
436 break;
437 }
438
439 if (found) {
440 if (fix > 0) {
441 WARN_ONCE("Feeling lucky?\n");
442 }
443
444 rmesa->state.aos[i].aos_offset -= comp_size * fix;
445
446 for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++)
447 swizzle[i][ci] += fix;
448 } else {
449 WARN_ONCE
450 ("Cannot handle offset %x with stride %d, comp %d\n",
451 rmesa->state.aos[i].aos_offset,
452 rmesa->state.aos[i].aos_stride,
453 vb->AttribPtr[tab[i]]->size);
454 return R300_FALLBACK_TCL;
455 }
456 }
457
458 /* setup INPUT_ROUTE */
459 R300_STATECHANGE(r300, vir[0]);
460 ((drm_r300_cmd_header_t *) r300->hw.vir[0].cmd)->packet0.count =
461 t_vir0(&r300->hw.vir[0].cmd[R300_VIR_CNTL_0], vb->AttribPtr,
462 inputs, tab, nr);
463
464 R300_STATECHANGE(r300, vir[1]);
465 ((drm_r300_cmd_header_t *) r300->hw.vir[1].cmd)->packet0.count =
466 t_vir1(&r300->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, nr);
467
468 /* Set up input_cntl */
469 /* I don't think this is needed for vertex buffers, but it doesn't hurt anything */
470 R300_STATECHANGE(r300, vic);
471 r300->hw.vic.cmd[R300_VIC_CNTL_0] = 0x5555; /* Hard coded value, no idea what it means */
472 r300->hw.vic.cmd[R300_VIC_CNTL_1] = t_vic(ctx, InputsRead);
473
474 /* Stage 3: VAP output */
475
476 R300_STATECHANGE(r300, vof);
477
478 r300->hw.vof.cmd[R300_VOF_CNTL_0] = 0;
479 r300->hw.vof.cmd[R300_VOF_CNTL_1] = 0;
480
481 if (OutputsWritten & (1 << VERT_RESULT_HPOS))
482 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
483 R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
484
485 if (OutputsWritten & (1 << VERT_RESULT_COL0))
486 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
487 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT;
488
489 if (OutputsWritten & (1 << VERT_RESULT_COL1))
490 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
491 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
492
493 #if 0
494 if (OutputsWritten & (1 << VERT_RESULT_BFC0))
495 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
496 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT;
497
498 if (OutputsWritten & (1 << VERT_RESULT_BFC1))
499 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
500 R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
501
502 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
503 #endif
504
505 if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
506 r300->hw.vof.cmd[R300_VOF_CNTL_0] |=
507 R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
508
509 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
510 if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i)))
511 r300->hw.vof.cmd[R300_VOF_CNTL_1] |= (4 << (3 * i));
512
513 rmesa->state.aos_count = nr;
514
515 return R300_FALLBACK_NONE;
516 }
517
518 #ifdef USER_BUFFERS
519 void r300UseArrays(GLcontext * ctx)
520 {
521 r300ContextPtr rmesa = R300_CONTEXT(ctx);
522 int i;
523
524 if (rmesa->state.elt_dma.buf)
525 r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
526
527 for (i = 0; i < rmesa->state.aos_count; i++) {
528 if (rmesa->state.aos[i].buf)
529 r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
530 }
531 }
532 #endif
533
534 void r300ReleaseArrays(GLcontext * ctx)
535 {
536 r300ContextPtr rmesa = R300_CONTEXT(ctx);
537 int i;
538
539 r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
540 for (i = 0; i < rmesa->state.aos_count; i++) {
541 r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
542 }
543 }